-- Databricks notebook source -- DBTITLE 1,分区优化 -- CREATE OR REPLACE TABLE DM.DM_TF_EXT_UNIONALL_MARKET_SALES ( -- MARKET STRING, -- KEY_COMPETITOR STRING, -- CLASS STRING, -- YYYYMM STRING, -- PACK_COD STRING, -- CORP_COD STRING, -- AUDIT_COD STRING, -- PLATFORM_TYPE STRING, -- STORE_NAME STRING, -- STORE_TYPE STRING, -- REGION_TYPE STRING, -- DATA_SOURCE STRING, -- PACK_FLAG STRING, -- PROD_FLAG STRING, -- DTP_FLAG STRING, -- CMPS_FLAG STRING, -- NEW_CODE STRING COMMENT '主数据关联CODE', -- INST_CODE STRING COMMENT '内部机构编码', -- AIA_HP_FLAG STRING, -- DEPT_NAME STRING COMMENT '科室名称', -- H_LEVEL STRING, -- REIMBURSE STRING COMMENT '报销情况', -- REIMBURSE_TYPE STRING COMMENT '报销类型', -- PRESCRIPTION_TYPE STRING COMMENT '处方来源', -- PRESCRIPTION DECIMAL(35,10), -- PRESCRIPTION_LY DECIMAL(35,10), -- VA STRING, -- UT STRING, -- CU STRING, -- PT STRING, -- SALES_UNIT_CAL DECIMAL(35,6), -- SALES_UNIT_CAL_LY DECIMAL(35,6), -- SALES_VALUE_CAL DECIMAL(35,6), -- SALES_VALUE_CAL_LY DECIMAL(35,6), -- CONUTING_UNIT DECIMAL(35,6), -- CONUTING_UNIT_LY DECIMAL(35,6), -- PDOT DECIMAL(35,6), -- PDOT_LY DECIMAL(35,6), -- ETL_INSERT_DT TIMESTAMP, -- ETL_UPDATE_DT TIMESTAMP -- ) -- USING delta -- PARTITIONED BY (DATA_SOURCE) -- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_unionall_market_sales'; -- -- 上面是生产环境location,下面是测试环境location -- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_unionall_market_sales'; -- COMMAND ---------- -- DBTITLE 1,性能优化 -- -- 按最常用的过滤和 JOIN 字段做 Z-Order 聚簇 -- OPTIMIZE DM.DM_TF_EXT_UNIONALL_SALES -- ZORDER BY (DATA_SOURCE, PACK_CODE, YYYYMM); -- -- 收集统计信息,帮助优化器选择更好的执行计划 -- ANALYZE TABLE DM.DM_TF_EXT_UNIONALL_SALES COMPUTE STATISTICS FOR ALL COLUMNS; -- ANALYZE TABLE DM.dm_td_ext_unionall_market_pack_mapping COMPUTE STATISTICS FOR ALL COLUMNS; -- COMMAND ---------- -- DBTITLE 1,测试跑数据 -- delete from DM.DM_TF_EXT_UNIONALL_MARKET_SALES where DATA_SOURCE IN ('Retail(Quarterly)', 'EC(Monthly)') -- COMMAND ---------- -- DBTITLE 1,测试跑数据 -- INSERT INTO TABLE -- DM.DM_TF_EXT_UNIONALL_MARKET_SALES -- WITH -- t1_filtered AS ( -- SELECT -- * -- FROM -- DM.DM_TF_EXT_UNIONALL_SALES -- WHERE -- DATA_SOURCE IN ( -- -- 'IQVIA-CHPA(Monthly)', -- -- 'AIA(Monthly)' -- -- 'CHC(Quarterly)', -- -- 'IQVIA-COUNTY(Quarterly)', -- -- 'THC(Quarterly)' -- 'Retail(Quarterly)', -- -- 'DTP(Quarterly)', -- 'EC(Monthly)' -- -- 'XH Data(Quarterly)' -- ) -- 谓词下推,最先过滤 -- ), -- mapping_cleaned AS ( -- SELECT -- * -- FROM -- ( -- SELECT -- PACK_CODE, -- DATASOURCE, -- MARKET, -- KEY_COMPETITOR, -- CLASS, -- MARKET_RATIO, -- Value_brand_Ratio, -- Unit_brand_Ratio, -- CountingUnit_brand_Ratio, -- PDOT_brand_Ratio, -- GREATEST ( -- starttime, -- Value_brand_Ratio_start, -- Unit_brand_start, -- CountingUnit_brand_start, -- PDOT_brand_start -- ) AS eff_start, -- LEAST ( -- endtime, -- Value_brand_Ratio_end, -- Unit_brand_end, -- CountingUnit_brand_end, -- PDOT_brand_end -- ) AS eff_end -- FROM -- DM.dm_td_ext_unionall_market_pack_mapping -- ) tmp -- WHERE -- eff_start <= eff_end -- 在外层子查询中过滤 -- ), -- joined AS ( -- SELECT -- /*+ BROADCAST(t2, t3) */ -- t1.*, -- -- 当年 ratio -- t2.MARKET, -- t2.KEY_COMPETITOR, -- t2.CLASS, -- t2.MARKET_RATIO AS cy_market_ratio, -- t2.Value_brand_Ratio AS cy_value_ratio, -- t2.Unit_brand_Ratio AS cy_unit_ratio, -- t2.CountingUnit_brand_Ratio AS cy_cu_ratio, -- t2.PDOT_brand_Ratio AS cy_pdot_ratio, -- -- 去年 ratio -- t3.MARKET_RATIO AS ly_market_ratio, -- t3.Value_brand_Ratio AS ly_value_ratio, -- t3.Unit_brand_Ratio AS ly_unit_ratio, -- t3.CountingUnit_brand_Ratio AS ly_cu_ratio, -- t3.PDOT_brand_Ratio AS ly_pdot_ratio -- FROM -- t1_filtered t1 -- LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE -- AND t1.DATA_SOURCE = t2.DATASOURCE -- AND t1.YYYYMM >= t2.eff_start -- AND t1.YYYYMM <= t2.eff_end -- LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE -- AND t1.DATA_SOURCE = t3.DATASOURCE -- AND (t1.YYYYMM - 100) >= t3.eff_start -- AND (t1.YYYYMM - 100) <= t3.eff_end -- AND t2.MARKET = t3.MARKET -- 保持原逻辑 -- ) -- SELECT -- NVL(MARKET, 'ALL Market') AS MARKET, -- NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR, -- CASE -- WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS' -- WHEN CLASS IS NULL THEN 'OTHERS' -- ELSE CLASS -- END AS CLASS, -- YYYYMM, -- PACK_CODE, -- CORP_CODE, -- AUDIT_CODE, -- PLATFORM_TYPE, -- STORE_NAME, -- STORE_TYPE, -- REGION_TYPE, -- DATA_SOURCE, -- PACK_FLAG, -- PROD_FLAG, -- DTP_FLAG, -- CMPS_FLAG, -- NEW_CODE, -- INST_CODE, -- NULL AS AIA_HP_FLAG, -- DEPT_NAME, -- H_LEVEL, -- REIMBURSE, -- REIMBURSE_TYPE, -- PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE, -- PRESCRIPTION, -- PRESCRIPTION_LY, -- -- 预计算公用因子,避免重复计算 -- NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA, -- NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT, -- NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU, -- NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT, -- CAST( -- SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS SALES_UNIT_CAL, -- CAST( -- SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS SALES_UNIT_CAL_LY, -- CAST( -- SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS SALES_VALUE_CAL, -- CAST( -- SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS SALES_VALUE_CAL_LY, -- CAST( -- CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS CONUTING_UNIT, -- CAST( -- CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS CONUTING_UNIT_LY, -- CAST( -- CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS PDOT, -- CAST( -- CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) -- ) AS PDOT_LY, -- FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT, -- FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT -- FROM -- joined; -- COMMAND ---------- -- DBTITLE 1,market_sales INSERT OVERWRITE TABLE DM.DM_TF_EXT_UNIONALL_MARKET_SALES WITH t1_filtered AS ( SELECT * FROM DM.DM_TF_EXT_UNIONALL_SALES WHERE DATA_SOURCE IN ( 'IQVIA-CHPA(Monthly)', 'AIA(Monthly)', 'CHC(Quarterly)', 'IQVIA-COUNTY(Quarterly)', 'THC(Quarterly)', 'Retail(Quarterly)', 'DTP(Quarterly)', 'EC(Monthly)', 'XH Data(Quarterly)' ) -- 谓词下推,最先过滤 ), mapping_cleaned AS ( SELECT * FROM ( SELECT PACK_CODE, DATASOURCE, MARKET, KEY_COMPETITOR, CLASS, MARKET_RATIO, Value_brand_Ratio, Unit_brand_Ratio, CountingUnit_brand_Ratio, PDOT_brand_Ratio, GREATEST ( starttime, Value_brand_Ratio_start, Unit_brand_start, CountingUnit_brand_start, PDOT_brand_start ) AS eff_start, LEAST ( endtime, Value_brand_Ratio_end, Unit_brand_end, CountingUnit_brand_end, PDOT_brand_end ) AS eff_end FROM DM.dm_td_ext_unionall_market_pack_mapping ) tmp WHERE eff_start <= eff_end -- 在外层子查询中过滤 ), joined AS ( SELECT /*+ BROADCAST(t2, t3) */ t1.*, -- 当年 ratio t2.MARKET, t2.KEY_COMPETITOR, t2.CLASS, t2.MARKET_RATIO AS cy_market_ratio, t2.Value_brand_Ratio AS cy_value_ratio, t2.Unit_brand_Ratio AS cy_unit_ratio, t2.CountingUnit_brand_Ratio AS cy_cu_ratio, t2.PDOT_brand_Ratio AS cy_pdot_ratio, -- 去年 ratio t3.MARKET_RATIO AS ly_market_ratio, t3.Value_brand_Ratio AS ly_value_ratio, t3.Unit_brand_Ratio AS ly_unit_ratio, t3.CountingUnit_brand_Ratio AS ly_cu_ratio, t3.PDOT_brand_Ratio AS ly_pdot_ratio FROM t1_filtered t1 LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE AND t1.DATA_SOURCE = t2.DATASOURCE AND t1.YYYYMM >= t2.eff_start AND t1.YYYYMM <= t2.eff_end LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE AND t1.DATA_SOURCE = t3.DATASOURCE AND (t1.YYYYMM - 100) >= t3.eff_start AND (t1.YYYYMM - 100) <= t3.eff_end AND t2.MARKET = t3.MARKET -- 保持原逻辑 ) SELECT NVL(MARKET, 'ALL Market') AS MARKET, NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR, CASE WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS' WHEN CLASS IS NULL THEN 'OTHERS' ELSE CLASS END AS CLASS, YYYYMM, PACK_CODE, CORP_CODE, AUDIT_CODE, PLATFORM_TYPE, STORE_NAME, STORE_TYPE, REGION_TYPE, DATA_SOURCE, PACK_FLAG, PROD_FLAG, DTP_FLAG, CMPS_FLAG, NEW_CODE, INST_CODE, NULL AS AIA_HP_FLAG, DEPT_NAME, H_LEVEL, REIMBURSE, REIMBURSE_TYPE, PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE, PRESCRIPTION, PRESCRIPTION_LY, -- 预计算公用因子,避免重复计算 NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA, NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT, NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU, NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT, CAST( SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_UNIT_CAL, CAST( SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_UNIT_CAL_LY, CAST( SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_VALUE_CAL, CAST( SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_VALUE_CAL_LY, CAST( CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS CONUTING_UNIT, CAST( CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS CONUTING_UNIT_LY, CAST( CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS PDOT, CAST( CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS PDOT_LY, FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT, FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT FROM joined; -- COMMAND ---------- -- DBTITLE 1,maket_sales_cht INSERT OVERWRITE TABLE DM.DM_TF_EXT_THC_MARKET_SALES_CHT WITH t1_filtered AS ( SELECT * FROM DM.DM_TF_EXT_THC_SALES WHERE DATA_SOURCE = 'THC(Quarterly)' and AUDIT_CODE <> 'CHT' -- 谓词下推,最先过滤 ), mapping_cleaned AS ( SELECT * FROM ( SELECT PACK_CODE, 'THC(Quarterly)' as DATASOURCE, MARKET, KEY_COMPETITOR, CLASS, MARKET_RATIO, Value_brand_Ratio, Unit_brand_Ratio, CountingUnit_brand_Ratio, PDOT_brand_Ratio, GREATEST ( starttime, Value_brand_Ratio_start, Unit_brand_start, CountingUnit_brand_start, PDOT_brand_start ) AS eff_start, LEAST ( endtime, Value_brand_Ratio_end, Unit_brand_end, CountingUnit_brand_end, PDOT_brand_end ) AS eff_end FROM DM.dm_td_ext_THC_market_pack_mapping ) tmp WHERE eff_start <= eff_end -- 在外层子查询中过滤 ), joined AS ( SELECT /*+ BROADCAST(t2, t3) */ t1.*, -- 当年 ratio t2.MARKET, t2.KEY_COMPETITOR, t2.CLASS, t2.MARKET_RATIO AS cy_market_ratio, t2.Value_brand_Ratio AS cy_value_ratio, t2.Unit_brand_Ratio AS cy_unit_ratio, t2.CountingUnit_brand_Ratio AS cy_cu_ratio, t2.PDOT_brand_Ratio AS cy_pdot_ratio, -- 去年 ratio t3.MARKET_RATIO AS ly_market_ratio, t3.Value_brand_Ratio AS ly_value_ratio, t3.Unit_brand_Ratio AS ly_unit_ratio, t3.CountingUnit_brand_Ratio AS ly_cu_ratio, t3.PDOT_brand_Ratio AS ly_pdot_ratio FROM t1_filtered t1 LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE AND t1.DATA_SOURCE = t2.DATASOURCE AND t1.YYYYMM >= t2.eff_start AND t1.YYYYMM <= t2.eff_end LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE AND t1.DATA_SOURCE = t3.DATASOURCE AND (t1.YYYYMM - 100) >= t3.eff_start AND (t1.YYYYMM - 100) <= t3.eff_end AND t2.MARKET = t3.MARKET -- 保持原逻辑 ) SELECT NVL(MARKET, 'ALL Market') AS MARKET, NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR, CASE WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS' WHEN CLASS IS NULL THEN 'OTHERS' ELSE CLASS END AS CLASS, YYYYMM, PACK_CODE, CORP_CODE, AUDIT_CODE, PLATFORM_TYPE, STORE_NAME, STORE_TYPE, REGION_TYPE, DATA_SOURCE, PACK_FLAG, PROD_FLAG, DTP_FLAG, CMPS_FLAG, NEW_CODE, INST_CODE, NULL AS AIA_HP_FLAG, DEPT_NAME, H_LEVEL, REIMBURSE, REIMBURSE_TYPE, PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE, PRESCRIPTION, PRESCRIPTION_LY, -- 预计算公用因子,避免重复计算 NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA, NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT, NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU, NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT, CAST( SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_UNIT_CAL, CAST( SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_UNIT_CAL_LY, CAST( SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_VALUE_CAL, CAST( SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS SALES_VALUE_CAL_LY, CAST( CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS CONUTING_UNIT, CAST( CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS CONUTING_UNIT_LY, CAST( CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6) ) AS PDOT, CAST( CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6) ) AS PDOT_LY, FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT, FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT FROM joined;