CREATE TABLE data_skills (
skill_category VARCHAR(50),
technologies ARRAY<VARCHAR>,
experience_level VARCHAR(20)
);
INSERT INTO data_skills VALUES
('AWS Data Services', ARRAY['Athena', 'Redshift', 'Glue', 'Kinesis', 'Firehose'], 'Production'),
('SQL Databases', ARRAY['PostgreSQL', 'MySQL', 'Aurora', 'DynamoDB'], 'Expert'),
('ETL Pipelines', ARRAY['Data Transformation', 'Incremental Loading', 'Data Quality'], 'Advanced'),
('Analytics', ARRAY['Time-Series', 'Aggregations', 'Window Functions'], 'Production');
SELECT
service,
use_case,
capabilities
FROM aws_data_services
WHERE experience = 'Production';
SELECT
COUNT(*) AS optimization_techniques
FROM sql_skills
WHERE category IN (
'Partition Pruning',
'Index Optimization',
'Query Rewriting',
'Join Strategies',
'Window Functions',
'CTEs & Subqueries'
);
CREATE VIEW pipeline_expertise AS
SELECT
'Incremental Loading' AS pattern,
'MERGE/UPSERT operations, change data capture' AS description
UNION ALL
SELECT 'Data Transformation', 'Cleaning, enrichment, feature engineering'
UNION ALL
SELECT 'Streaming Processing', 'Real-time aggregation, windowing, event processing'
UNION ALL
SELECT 'Data Quality', 'Validation, anomaly detection, schema enforcement';
WITH analytics_capabilities AS (
SELECT 'Time-Series Analysis' AS capability
UNION ALL SELECT 'Rolling Aggregations'
UNION ALL SELECT 'Percentile Calculations'
UNION ALL SELECT 'Cohort Analysis'
UNION ALL SELECT 'Statistical Features'
)
SELECT * FROM analytics_capabilities;
SELECT
database_type,
ARRAY_AGG(technology) AS technologies
FROM database_experience
GROUP BY database_type;
SELECT
AVG(statistical_feature) AS mean,
STDDEV(statistical_feature) AS stddev,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY statistical_feature) AS median
FROM ml_feature_engineering
WHERE feature_type IN (
'Aggregations',
'Time-Based Features',
'Rolling Windows',
'Cross-Table Joins'
);
CREATE TABLE data_quality_practices (
practice VARCHAR(100),
implementation TEXT
);
INSERT INTO data_quality_practices VALUES
('Schema Validation', 'Enforcing data types, constraints, referential integrity'),
('Anomaly Detection', 'IQR method, statistical outliers, threshold monitoring'),
('Data Lineage', 'Tracking transformations, source to destination mapping'),
('Monitoring', 'Pipeline health checks, freshness metrics, error alerts');