update20260427

This commit is contained in:
2026-04-27 11:04:09 +08:00
commit 3e14e78af3
173 changed files with 62579 additions and 0 deletions

View File

@@ -0,0 +1,524 @@
-- Databricks notebook source
-- DBTITLE 1,分区优化
-- CREATE OR REPLACE TABLE DM.DM_TF_EXT_UNIONALL_MARKET_SALES (
-- MARKET STRING,
-- KEY_COMPETITOR STRING,
-- CLASS STRING,
-- YYYYMM STRING,
-- PACK_COD STRING,
-- CORP_COD STRING,
-- AUDIT_COD STRING,
-- PLATFORM_TYPE STRING,
-- STORE_NAME STRING,
-- STORE_TYPE STRING,
-- REGION_TYPE STRING,
-- DATA_SOURCE STRING,
-- PACK_FLAG STRING,
-- PROD_FLAG STRING,
-- DTP_FLAG STRING,
-- CMPS_FLAG STRING,
-- NEW_CODE STRING COMMENT '主数据关联CODE',
-- INST_CODE STRING COMMENT '内部机构编码',
-- AIA_HP_FLAG STRING,
-- DEPT_NAME STRING COMMENT '科室名称',
-- H_LEVEL STRING,
-- REIMBURSE STRING COMMENT '报销情况',
-- REIMBURSE_TYPE STRING COMMENT '报销类型',
-- PRESCRIPTION_TYPE STRING COMMENT '处方来源',
-- PRESCRIPTION DECIMAL(35,10),
-- PRESCRIPTION_LY DECIMAL(35,10),
-- VA STRING,
-- UT STRING,
-- CU STRING,
-- PT STRING,
-- SALES_UNIT_CAL DECIMAL(35,6),
-- SALES_UNIT_CAL_LY DECIMAL(35,6),
-- SALES_VALUE_CAL DECIMAL(35,6),
-- SALES_VALUE_CAL_LY DECIMAL(35,6),
-- CONUTING_UNIT DECIMAL(35,6),
-- CONUTING_UNIT_LY DECIMAL(35,6),
-- PDOT DECIMAL(35,6),
-- PDOT_LY DECIMAL(35,6),
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- PARTITIONED BY (DATA_SOURCE)
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_unionall_market_sales';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_unionall_market_sales';
-- COMMAND ----------
-- DBTITLE 1,性能优化
-- -- 按最常用的过滤和 JOIN 字段做 Z-Order 聚簇
-- OPTIMIZE DM.DM_TF_EXT_UNIONALL_SALES
-- ZORDER BY (DATA_SOURCE, PACK_CODE, YYYYMM);
-- -- 收集统计信息,帮助优化器选择更好的执行计划
-- ANALYZE TABLE DM.DM_TF_EXT_UNIONALL_SALES COMPUTE STATISTICS FOR ALL COLUMNS;
-- ANALYZE TABLE DM.dm_td_ext_unionall_market_pack_mapping COMPUTE STATISTICS FOR ALL COLUMNS;
-- COMMAND ----------
-- DBTITLE 1,测试跑数据
-- delete from DM.DM_TF_EXT_UNIONALL_MARKET_SALES where DATA_SOURCE IN ('Retail(Quarterly)', 'EC(Monthly)')
-- COMMAND ----------
-- DBTITLE 1,测试跑数据
-- INSERT INTO TABLE
-- DM.DM_TF_EXT_UNIONALL_MARKET_SALES
-- WITH
-- t1_filtered AS (
-- SELECT
-- *
-- FROM
-- DM.DM_TF_EXT_UNIONALL_SALES
-- WHERE
-- DATA_SOURCE IN (
-- -- 'IQVIA-CHPA(Monthly)',
-- -- 'AIA(Monthly)'
-- -- 'CHC(Quarterly)',
-- -- 'IQVIA-COUNTY(Quarterly)',
-- -- 'THC(Quarterly)'
-- 'Retail(Quarterly)',
-- -- 'DTP(Quarterly)',
-- 'EC(Monthly)'
-- -- 'XH Data(Quarterly)'
-- ) -- 谓词下推,最先过滤
-- ),
-- mapping_cleaned AS (
-- SELECT
-- *
-- FROM
-- (
-- SELECT
-- PACK_CODE,
-- DATASOURCE,
-- MARKET,
-- KEY_COMPETITOR,
-- CLASS,
-- MARKET_RATIO,
-- Value_brand_Ratio,
-- Unit_brand_Ratio,
-- CountingUnit_brand_Ratio,
-- PDOT_brand_Ratio,
-- GREATEST (
-- starttime,
-- Value_brand_Ratio_start,
-- Unit_brand_start,
-- CountingUnit_brand_start,
-- PDOT_brand_start
-- ) AS eff_start,
-- LEAST (
-- endtime,
-- Value_brand_Ratio_end,
-- Unit_brand_end,
-- CountingUnit_brand_end,
-- PDOT_brand_end
-- ) AS eff_end
-- FROM
-- DM.dm_td_ext_unionall_market_pack_mapping
-- ) tmp
-- WHERE
-- eff_start <= eff_end -- 在外层子查询中过滤
-- ),
-- joined AS (
-- SELECT
-- /*+ BROADCAST(t2, t3) */
-- t1.*,
-- -- 当年 ratio
-- t2.MARKET,
-- t2.KEY_COMPETITOR,
-- t2.CLASS,
-- t2.MARKET_RATIO AS cy_market_ratio,
-- t2.Value_brand_Ratio AS cy_value_ratio,
-- t2.Unit_brand_Ratio AS cy_unit_ratio,
-- t2.CountingUnit_brand_Ratio AS cy_cu_ratio,
-- t2.PDOT_brand_Ratio AS cy_pdot_ratio,
-- -- 去年 ratio
-- t3.MARKET_RATIO AS ly_market_ratio,
-- t3.Value_brand_Ratio AS ly_value_ratio,
-- t3.Unit_brand_Ratio AS ly_unit_ratio,
-- t3.CountingUnit_brand_Ratio AS ly_cu_ratio,
-- t3.PDOT_brand_Ratio AS ly_pdot_ratio
-- FROM
-- t1_filtered t1
-- LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE
-- AND t1.DATA_SOURCE = t2.DATASOURCE
-- AND t1.YYYYMM >= t2.eff_start
-- AND t1.YYYYMM <= t2.eff_end
-- LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE
-- AND t1.DATA_SOURCE = t3.DATASOURCE
-- AND (t1.YYYYMM - 100) >= t3.eff_start
-- AND (t1.YYYYMM - 100) <= t3.eff_end
-- AND t2.MARKET = t3.MARKET -- 保持原逻辑
-- )
-- SELECT
-- NVL(MARKET, 'ALL Market') AS MARKET,
-- NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR,
-- CASE
-- WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS'
-- WHEN CLASS IS NULL THEN 'OTHERS'
-- ELSE CLASS
-- END AS CLASS,
-- YYYYMM,
-- PACK_CODE,
-- CORP_CODE,
-- AUDIT_CODE,
-- PLATFORM_TYPE,
-- STORE_NAME,
-- STORE_TYPE,
-- REGION_TYPE,
-- DATA_SOURCE,
-- PACK_FLAG,
-- PROD_FLAG,
-- DTP_FLAG,
-- CMPS_FLAG,
-- NEW_CODE,
-- INST_CODE,
-- NULL AS AIA_HP_FLAG,
-- DEPT_NAME,
-- H_LEVEL,
-- REIMBURSE,
-- REIMBURSE_TYPE,
-- PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE,
-- PRESCRIPTION,
-- PRESCRIPTION_LY,
-- -- 预计算公用因子,避免重复计算
-- NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA,
-- NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT,
-- NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU,
-- NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT,
-- CAST(
-- SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_UNIT_CAL,
-- CAST(
-- SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_UNIT_CAL_LY,
-- CAST(
-- SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_VALUE_CAL,
-- CAST(
-- SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_VALUE_CAL_LY,
-- CAST(
-- CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS CONUTING_UNIT,
-- CAST(
-- CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS CONUTING_UNIT_LY,
-- CAST(
-- CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS PDOT,
-- CAST(
-- CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS PDOT_LY,
-- FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
-- FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
-- FROM
-- joined;
-- COMMAND ----------
INSERT OVERWRITE TABLE
DM.DM_TF_EXT_UNIONALL_MARKET_SALES
WITH
t1_filtered AS (
SELECT
*
FROM
DM.DM_TF_EXT_UNIONALL_SALES
WHERE
DATA_SOURCE IN (
'IQVIA-CHPA(Monthly)',
'AIA(Monthly)',
'CHC(Quarterly)',
'IQVIA-COUNTY(Quarterly)',
'THC(Quarterly)',
'Retail(Quarterly)',
'DTP(Quarterly)',
'EC(Monthly)',
'XH Data(Quarterly)'
) -- 谓词下推,最先过滤
),
mapping_cleaned AS (
SELECT
*
FROM
(
SELECT
PACK_CODE,
DATASOURCE,
MARKET,
KEY_COMPETITOR,
CLASS,
MARKET_RATIO,
Value_brand_Ratio,
Unit_brand_Ratio,
CountingUnit_brand_Ratio,
PDOT_brand_Ratio,
GREATEST (
starttime,
Value_brand_Ratio_start,
Unit_brand_start,
CountingUnit_brand_start,
PDOT_brand_start
) AS eff_start,
LEAST (
endtime,
Value_brand_Ratio_end,
Unit_brand_end,
CountingUnit_brand_end,
PDOT_brand_end
) AS eff_end
FROM
DM.dm_td_ext_unionall_market_pack_mapping
) tmp
WHERE
eff_start <= eff_end -- 在外层子查询中过滤
),
joined AS (
SELECT
/*+ BROADCAST(t2, t3) */
t1.*,
-- 当年 ratio
t2.MARKET,
t2.KEY_COMPETITOR,
t2.CLASS,
t2.MARKET_RATIO AS cy_market_ratio,
t2.Value_brand_Ratio AS cy_value_ratio,
t2.Unit_brand_Ratio AS cy_unit_ratio,
t2.CountingUnit_brand_Ratio AS cy_cu_ratio,
t2.PDOT_brand_Ratio AS cy_pdot_ratio,
-- 去年 ratio
t3.MARKET_RATIO AS ly_market_ratio,
t3.Value_brand_Ratio AS ly_value_ratio,
t3.Unit_brand_Ratio AS ly_unit_ratio,
t3.CountingUnit_brand_Ratio AS ly_cu_ratio,
t3.PDOT_brand_Ratio AS ly_pdot_ratio
FROM
t1_filtered t1
LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE
AND t1.DATA_SOURCE = t2.DATASOURCE
AND t1.YYYYMM >= t2.eff_start
AND t1.YYYYMM <= t2.eff_end
LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE
AND t1.DATA_SOURCE = t3.DATASOURCE
AND (t1.YYYYMM - 100) >= t3.eff_start
AND (t1.YYYYMM - 100) <= t3.eff_end
AND t2.MARKET = t3.MARKET -- 保持原逻辑
)
SELECT
NVL(MARKET, 'ALL Market') AS MARKET,
NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR,
CASE
WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS'
WHEN CLASS IS NULL THEN 'OTHERS'
ELSE CLASS
END AS CLASS,
YYYYMM,
PACK_CODE,
CORP_CODE,
AUDIT_CODE,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
DATA_SOURCE,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
NEW_CODE,
INST_CODE,
NULL AS AIA_HP_FLAG,
DEPT_NAME,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE,
PRESCRIPTION,
PRESCRIPTION_LY,
-- 预计算公用因子,避免重复计算
NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA,
NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT,
NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU,
NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT,
CAST(
SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL,
CAST(
SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL_LY,
CAST(
SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL,
CAST(
SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL_LY,
CAST(
CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT,
CAST(
CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT_LY,
CAST(
CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT,
CAST(
CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT_LY,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
joined;
-- COMMAND ----------
-- DBTITLE 1,maket_sales_cht
INSERT OVERWRITE TABLE
DM.DM_TF_EXT_THC_MARKET_SALES_CHT
WITH
t1_filtered AS (
SELECT
*
FROM
DM.DM_TF_EXT_THC_SALES
WHERE DATA_SOURCE = 'THC(Quarterly)'
and AUDIT_CODE <> 'CHT'
-- 谓词下推,最先过滤
),
mapping_cleaned AS (
SELECT
*
FROM
(
SELECT
PACK_CODE,
'THC(Quarterly)' as DATASOURCE,
MARKET,
KEY_COMPETITOR,
CLASS,
MARKET_RATIO,
Value_brand_Ratio,
Unit_brand_Ratio,
CountingUnit_brand_Ratio,
PDOT_brand_Ratio,
GREATEST (
starttime,
Value_brand_Ratio_start,
Unit_brand_start,
CountingUnit_brand_start,
PDOT_brand_start
) AS eff_start,
LEAST (
endtime,
Value_brand_Ratio_end,
Unit_brand_end,
CountingUnit_brand_end,
PDOT_brand_end
) AS eff_end
FROM
DM.dm_td_ext_THC_market_pack_mapping
) tmp
WHERE
eff_start <= eff_end -- 在外层子查询中过滤
),
joined AS (
SELECT
/*+ BROADCAST(t2, t3) */
t1.*,
-- 当年 ratio
t2.MARKET,
t2.KEY_COMPETITOR,
t2.CLASS,
t2.MARKET_RATIO AS cy_market_ratio,
t2.Value_brand_Ratio AS cy_value_ratio,
t2.Unit_brand_Ratio AS cy_unit_ratio,
t2.CountingUnit_brand_Ratio AS cy_cu_ratio,
t2.PDOT_brand_Ratio AS cy_pdot_ratio,
-- 去年 ratio
t3.MARKET_RATIO AS ly_market_ratio,
t3.Value_brand_Ratio AS ly_value_ratio,
t3.Unit_brand_Ratio AS ly_unit_ratio,
t3.CountingUnit_brand_Ratio AS ly_cu_ratio,
t3.PDOT_brand_Ratio AS ly_pdot_ratio
FROM
t1_filtered t1
LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE
AND t1.DATA_SOURCE = t2.DATASOURCE
AND t1.YYYYMM >= t2.eff_start
AND t1.YYYYMM <= t2.eff_end
LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE
AND t1.DATA_SOURCE = t3.DATASOURCE
AND (t1.YYYYMM - 100) >= t3.eff_start
AND (t1.YYYYMM - 100) <= t3.eff_end
AND t2.MARKET = t3.MARKET -- 保持原逻辑
)
SELECT
NVL(MARKET, 'ALL Market') AS MARKET,
NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR,
CASE
WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS'
WHEN CLASS IS NULL THEN 'OTHERS'
ELSE CLASS
END AS CLASS,
YYYYMM,
PACK_CODE,
CORP_CODE,
AUDIT_CODE,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
DATA_SOURCE,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
NEW_CODE,
INST_CODE,
NULL AS AIA_HP_FLAG,
DEPT_NAME,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE,
PRESCRIPTION,
PRESCRIPTION_LY,
-- 预计算公用因子,避免重复计算
NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA,
NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT,
NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU,
NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT,
CAST(
SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL,
CAST(
SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL_LY,
CAST(
SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL,
CAST(
SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL_LY,
CAST(
CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT,
CAST(
CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT_LY,
CAST(
CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT,
CAST(
CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT_LY,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
joined;