| { | |
| "dag_id": "ecommerce_etl_pipeline", | |
| "description": "Daily ETL pipeline for e-commerce data warehouse", | |
| "schedule_interval": "0 2 * * *", | |
| "start_date": "2025-01-01", | |
| "catchup": false, | |
| "tags": ["etl", "ecommerce", "daily"], | |
| "default_args": { | |
| "owner": "data_engineering", | |
| "retries": 3, | |
| "retry_delay_minutes": 5, | |
| "email_on_failure": true | |
| }, | |
| "tasks": [ | |
| { | |
| "task_id": "extract_customers", | |
| "operator": "PythonOperator", | |
| "description": "Extract customer data from source database", | |
| "upstream_dependencies": [], | |
| "downstream_dependencies": ["transform_customers"], | |
| "source": "postgres://source_db/customers", | |
| "target": "s3://data-lake/raw/customers/" | |
| }, | |
| { | |
| "task_id": "extract_orders", | |
| "operator": "PythonOperator", | |
| "description": "Extract orders data from source database", | |
| "upstream_dependencies": [], | |
| "downstream_dependencies": ["transform_orders"], | |
| "source": "postgres://source_db/orders", | |
| "target": "s3://data-lake/raw/orders/" | |
| }, | |
| { | |
| "task_id": "extract_products", | |
| "operator": "PythonOperator", | |
| "description": "Extract products data from source database", | |
| "upstream_dependencies": [], | |
| "downstream_dependencies": ["transform_products"], | |
| "source": "postgres://source_db/products", | |
| "target": "s3://data-lake/raw/products/" | |
| }, | |
| { | |
| "task_id": "extract_order_items", | |
| "operator": "PythonOperator", | |
| "description": "Extract order items from source database", | |
| "upstream_dependencies": [], | |
| "downstream_dependencies": ["transform_order_items"], | |
| "source": "postgres://source_db/order_items", | |
| "target": "s3://data-lake/raw/order_items/" | |
| }, | |
| { | |
| "task_id": "transform_customers", | |
| "operator": "SparkSubmitOperator", | |
| "description": "Clean and transform customer data", | |
| "upstream_dependencies": ["extract_customers"], | |
| "downstream_dependencies": ["load_dim_customers"], | |
| "source": "s3://data-lake/raw/customers/", | |
| "target": "s3://data-lake/transformed/customers/" | |
| }, | |
| { | |
| "task_id": "transform_orders", | |
| "operator": "SparkSubmitOperator", | |
| "description": "Clean and transform orders data", | |
| "upstream_dependencies": ["extract_orders"], | |
| "downstream_dependencies": ["load_fct_orders"], | |
| "source": "s3://data-lake/raw/orders/", | |
| "target": "s3://data-lake/transformed/orders/" | |
| }, | |
| { | |
| "task_id": "transform_products", | |
| "operator": "SparkSubmitOperator", | |
| "description": "Clean and transform products data", | |
| "upstream_dependencies": ["extract_products"], | |
| "downstream_dependencies": ["load_dim_products"], | |
| "source": "s3://data-lake/raw/products/", | |
| "target": "s3://data-lake/transformed/products/" | |
| }, | |
| { | |
| "task_id": "transform_order_items", | |
| "operator": "SparkSubmitOperator", | |
| "description": "Clean and transform order items data", | |
| "upstream_dependencies": ["extract_order_items"], | |
| "downstream_dependencies": ["load_fct_orders"], | |
| "source": "s3://data-lake/raw/order_items/", | |
| "target": "s3://data-lake/transformed/order_items/" | |
| }, | |
| { | |
| "task_id": "load_dim_customers", | |
| "operator": "SnowflakeOperator", | |
| "description": "Load customer dimension to Snowflake", | |
| "upstream_dependencies": ["transform_customers"], | |
| "downstream_dependencies": ["build_customer_metrics"], | |
| "source": "s3://data-lake/transformed/customers/", | |
| "target": "snowflake://warehouse/analytics.dim_customers" | |
| }, | |
| { | |
| "task_id": "load_dim_products", | |
| "operator": "SnowflakeOperator", | |
| "description": "Load product dimension to Snowflake", | |
| "upstream_dependencies": ["transform_products"], | |
| "downstream_dependencies": ["build_sales_report"], | |
| "source": "s3://data-lake/transformed/products/", | |
| "target": "snowflake://warehouse/analytics.dim_products" | |
| }, | |
| { | |
| "task_id": "load_fct_orders", | |
| "operator": "SnowflakeOperator", | |
| "description": "Load orders fact table to Snowflake", | |
| "upstream_dependencies": ["transform_orders", "transform_order_items"], | |
| "downstream_dependencies": ["build_customer_metrics", "build_sales_report"], | |
| "source": ["s3://data-lake/transformed/orders/", "s3://data-lake/transformed/order_items/"], | |
| "target": "snowflake://warehouse/analytics.fct_orders" | |
| }, | |
| { | |
| "task_id": "build_customer_metrics", | |
| "operator": "SnowflakeOperator", | |
| "description": "Calculate customer lifetime value and metrics", | |
| "upstream_dependencies": ["load_dim_customers", "load_fct_orders"], | |
| "downstream_dependencies": ["publish_to_bi"], | |
| "source": ["analytics.dim_customers", "analytics.fct_orders"], | |
| "target": "snowflake://warehouse/analytics.rpt_customer_metrics" | |
| }, | |
| { | |
| "task_id": "build_sales_report", | |
| "operator": "SnowflakeOperator", | |
| "description": "Build daily sales report", | |
| "upstream_dependencies": ["load_dim_products", "load_fct_orders"], | |
| "downstream_dependencies": ["publish_to_bi"], | |
| "source": ["analytics.dim_products", "analytics.fct_orders"], | |
| "target": "snowflake://warehouse/analytics.rpt_daily_sales" | |
| }, | |
| { | |
| "task_id": "publish_to_bi", | |
| "operator": "PythonOperator", | |
| "description": "Publish reports to BI tool", | |
| "upstream_dependencies": ["build_customer_metrics", "build_sales_report"], | |
| "downstream_dependencies": ["notify_stakeholders"], | |
| "source": ["analytics.rpt_customer_metrics", "analytics.rpt_daily_sales"], | |
| "target": "tableau://server/ecommerce_dashboard" | |
| }, | |
| { | |
| "task_id": "notify_stakeholders", | |
| "operator": "EmailOperator", | |
| "description": "Send completion notification", | |
| "upstream_dependencies": ["publish_to_bi"], | |
| "downstream_dependencies": [] | |
| } | |
| ], | |
| "notes": "Sample Airflow DAG representing a complete ETL pipeline with extract, transform, load, and reporting stages." | |
| } | |