Files
MarketAnalysis-ETL/UNIONALL/DM_TF_EXT_UNIONALL_MARKET_SALES.sql

526 lines
16 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Databricks notebook source
-- DBTITLE 1,分区优化
-- CREATE OR REPLACE TABLE DM.DM_TF_EXT_UNIONALL_MARKET_SALES (
-- MARKET STRING,
-- KEY_COMPETITOR STRING,
-- CLASS STRING,
-- YYYYMM STRING,
-- PACK_COD STRING,
-- CORP_COD STRING,
-- AUDIT_COD STRING,
-- PLATFORM_TYPE STRING,
-- STORE_NAME STRING,
-- STORE_TYPE STRING,
-- REGION_TYPE STRING,
-- DATA_SOURCE STRING,
-- PACK_FLAG STRING,
-- PROD_FLAG STRING,
-- DTP_FLAG STRING,
-- CMPS_FLAG STRING,
-- NEW_CODE STRING COMMENT '主数据关联CODE',
-- INST_CODE STRING COMMENT '内部机构编码',
-- AIA_HP_FLAG STRING,
-- DEPT_NAME STRING COMMENT '科室名称',
-- H_LEVEL STRING,
-- REIMBURSE STRING COMMENT '报销情况',
-- REIMBURSE_TYPE STRING COMMENT '报销类型',
-- PRESCRIPTION_TYPE STRING COMMENT '处方来源',
-- PRESCRIPTION DECIMAL(35,10),
-- PRESCRIPTION_LY DECIMAL(35,10),
-- VA STRING,
-- UT STRING,
-- CU STRING,
-- PT STRING,
-- SALES_UNIT_CAL DECIMAL(35,6),
-- SALES_UNIT_CAL_LY DECIMAL(35,6),
-- SALES_VALUE_CAL DECIMAL(35,6),
-- SALES_VALUE_CAL_LY DECIMAL(35,6),
-- CONUTING_UNIT DECIMAL(35,6),
-- CONUTING_UNIT_LY DECIMAL(35,6),
-- PDOT DECIMAL(35,6),
-- PDOT_LY DECIMAL(35,6),
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- PARTITIONED BY (DATA_SOURCE)
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_unionall_market_sales';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_unionall_market_sales';
-- COMMAND ----------
-- DBTITLE 1,性能优化
-- -- 按最常用的过滤和 JOIN 字段做 Z-Order 聚簇
-- OPTIMIZE DM.DM_TF_EXT_UNIONALL_SALES
-- ZORDER BY (DATA_SOURCE, PACK_CODE, YYYYMM);
-- -- 收集统计信息,帮助优化器选择更好的执行计划
-- ANALYZE TABLE DM.DM_TF_EXT_UNIONALL_SALES COMPUTE STATISTICS FOR ALL COLUMNS;
-- ANALYZE TABLE DM.dm_td_ext_unionall_market_pack_mapping COMPUTE STATISTICS FOR ALL COLUMNS;
-- COMMAND ----------
-- DBTITLE 1,测试跑数据
-- delete from DM.DM_TF_EXT_UNIONALL_MARKET_SALES where DATA_SOURCE IN ('Retail(Quarterly)', 'EC(Monthly)')
-- COMMAND ----------
-- DBTITLE 1,测试跑数据
-- INSERT INTO TABLE
-- DM.DM_TF_EXT_UNIONALL_MARKET_SALES
-- WITH
-- t1_filtered AS (
-- SELECT
-- *
-- FROM
-- DM.DM_TF_EXT_UNIONALL_SALES
-- WHERE
-- DATA_SOURCE IN (
-- -- 'IQVIA-CHPA(Monthly)',
-- -- 'AIA(Monthly)'
-- -- 'CHC(Quarterly)',
-- -- 'IQVIA-COUNTY(Quarterly)',
-- -- 'THC(Quarterly)'
-- 'Retail(Quarterly)',
-- -- 'DTP(Quarterly)',
-- 'EC(Monthly)'
-- -- 'XH Data(Quarterly)'
-- ) -- 谓词下推,最先过滤
-- ),
-- mapping_cleaned AS (
-- SELECT
-- *
-- FROM
-- (
-- SELECT
-- PACK_CODE,
-- DATASOURCE,
-- MARKET,
-- KEY_COMPETITOR,
-- CLASS,
-- MARKET_RATIO,
-- Value_brand_Ratio,
-- Unit_brand_Ratio,
-- CountingUnit_brand_Ratio,
-- PDOT_brand_Ratio,
-- GREATEST (
-- starttime,
-- Value_brand_Ratio_start,
-- Unit_brand_start,
-- CountingUnit_brand_start,
-- PDOT_brand_start
-- ) AS eff_start,
-- LEAST (
-- endtime,
-- Value_brand_Ratio_end,
-- Unit_brand_end,
-- CountingUnit_brand_end,
-- PDOT_brand_end
-- ) AS eff_end
-- FROM
-- DM.dm_td_ext_unionall_market_pack_mapping
-- ) tmp
-- WHERE
-- eff_start <= eff_end -- 在外层子查询中过滤
-- ),
-- joined AS (
-- SELECT
-- /*+ BROADCAST(t2, t3) */
-- t1.*,
-- -- 当年 ratio
-- t2.MARKET,
-- t2.KEY_COMPETITOR,
-- t2.CLASS,
-- t2.MARKET_RATIO AS cy_market_ratio,
-- t2.Value_brand_Ratio AS cy_value_ratio,
-- t2.Unit_brand_Ratio AS cy_unit_ratio,
-- t2.CountingUnit_brand_Ratio AS cy_cu_ratio,
-- t2.PDOT_brand_Ratio AS cy_pdot_ratio,
-- -- 去年 ratio
-- t3.MARKET_RATIO AS ly_market_ratio,
-- t3.Value_brand_Ratio AS ly_value_ratio,
-- t3.Unit_brand_Ratio AS ly_unit_ratio,
-- t3.CountingUnit_brand_Ratio AS ly_cu_ratio,
-- t3.PDOT_brand_Ratio AS ly_pdot_ratio
-- FROM
-- t1_filtered t1
-- LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE
-- AND t1.DATA_SOURCE = t2.DATASOURCE
-- AND t1.YYYYMM >= t2.eff_start
-- AND t1.YYYYMM <= t2.eff_end
-- LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE
-- AND t1.DATA_SOURCE = t3.DATASOURCE
-- AND (t1.YYYYMM - 100) >= t3.eff_start
-- AND (t1.YYYYMM - 100) <= t3.eff_end
-- AND t2.MARKET = t3.MARKET -- 保持原逻辑
-- )
-- SELECT
-- NVL(MARKET, 'ALL Market') AS MARKET,
-- NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR,
-- CASE
-- WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS'
-- WHEN CLASS IS NULL THEN 'OTHERS'
-- ELSE CLASS
-- END AS CLASS,
-- YYYYMM,
-- PACK_CODE,
-- CORP_CODE,
-- AUDIT_CODE,
-- PLATFORM_TYPE,
-- STORE_NAME,
-- STORE_TYPE,
-- REGION_TYPE,
-- DATA_SOURCE,
-- PACK_FLAG,
-- PROD_FLAG,
-- DTP_FLAG,
-- CMPS_FLAG,
-- NEW_CODE,
-- INST_CODE,
-- NULL AS AIA_HP_FLAG,
-- DEPT_NAME,
-- H_LEVEL,
-- REIMBURSE,
-- REIMBURSE_TYPE,
-- PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE,
-- PRESCRIPTION,
-- PRESCRIPTION_LY,
-- -- 预计算公用因子,避免重复计算
-- NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA,
-- NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT,
-- NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU,
-- NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT,
-- CAST(
-- SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_UNIT_CAL,
-- CAST(
-- SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_UNIT_CAL_LY,
-- CAST(
-- SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_VALUE_CAL,
-- CAST(
-- SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS SALES_VALUE_CAL_LY,
-- CAST(
-- CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS CONUTING_UNIT,
-- CAST(
-- CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS CONUTING_UNIT_LY,
-- CAST(
-- CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS PDOT,
-- CAST(
-- CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
-- ) AS PDOT_LY,
-- FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
-- FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
-- FROM
-- joined;
-- COMMAND ----------
-- DBTITLE 1,market_sales
INSERT OVERWRITE TABLE
DM.DM_TF_EXT_UNIONALL_MARKET_SALES
WITH
t1_filtered AS (
SELECT
*
FROM
DM.DM_TF_EXT_UNIONALL_SALES
WHERE
DATA_SOURCE IN (
'IQVIA-CHPA(Monthly)',
'AIA(Monthly)',
'CHC(Quarterly)',
'IQVIA-COUNTY(Quarterly)',
'THC(Quarterly)',
'Retail(Quarterly)',
'DTP(Quarterly)',
'EC(Monthly)',
'XH Data(Quarterly)'
) -- 谓词下推,最先过滤
),
mapping_cleaned AS (
SELECT
*
FROM
(
SELECT
PACK_CODE,
DATASOURCE,
MARKET,
KEY_COMPETITOR,
CLASS,
MARKET_RATIO,
Value_brand_Ratio,
Unit_brand_Ratio,
CountingUnit_brand_Ratio,
PDOT_brand_Ratio,
GREATEST (
starttime,
Value_brand_Ratio_start,
Unit_brand_start,
CountingUnit_brand_start,
PDOT_brand_start
) AS eff_start,
LEAST (
endtime,
Value_brand_Ratio_end,
Unit_brand_end,
CountingUnit_brand_end,
PDOT_brand_end
) AS eff_end
FROM
DM.dm_td_ext_unionall_market_pack_mapping
) tmp
WHERE
eff_start <= eff_end -- 在外层子查询中过滤
),
joined AS (
SELECT
/*+ BROADCAST(t2, t3) */
t1.*,
-- 当年 ratio
t2.MARKET,
t2.KEY_COMPETITOR,
t2.CLASS,
t2.MARKET_RATIO AS cy_market_ratio,
t2.Value_brand_Ratio AS cy_value_ratio,
t2.Unit_brand_Ratio AS cy_unit_ratio,
t2.CountingUnit_brand_Ratio AS cy_cu_ratio,
t2.PDOT_brand_Ratio AS cy_pdot_ratio,
-- 去年 ratio
t3.MARKET_RATIO AS ly_market_ratio,
t3.Value_brand_Ratio AS ly_value_ratio,
t3.Unit_brand_Ratio AS ly_unit_ratio,
t3.CountingUnit_brand_Ratio AS ly_cu_ratio,
t3.PDOT_brand_Ratio AS ly_pdot_ratio
FROM
t1_filtered t1
LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE
AND t1.DATA_SOURCE = t2.DATASOURCE
AND t1.YYYYMM >= t2.eff_start
AND t1.YYYYMM <= t2.eff_end
LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE
AND t1.DATA_SOURCE = t3.DATASOURCE
AND (t1.YYYYMM - 100) >= t3.eff_start
AND (t1.YYYYMM - 100) <= t3.eff_end
AND t2.MARKET = t3.MARKET -- 保持原逻辑
)
SELECT
NVL(MARKET, 'ALL Market') AS MARKET,
NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR,
CASE
WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS'
WHEN CLASS IS NULL THEN 'OTHERS'
ELSE CLASS
END AS CLASS,
YYYYMM,
PACK_CODE,
CORP_CODE,
AUDIT_CODE,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
DATA_SOURCE,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
NEW_CODE,
INST_CODE,
NULL AS AIA_HP_FLAG,
DEPT_NAME,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE,
PRESCRIPTION,
PRESCRIPTION_LY,
-- 预计算公用因子,避免重复计算
NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA,
NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT,
NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU,
NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT,
CAST(
SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL,
CAST(
SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL_LY,
CAST(
SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL,
CAST(
SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL_LY,
CAST(
CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT,
CAST(
CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT_LY,
CAST(
CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT,
CAST(
CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT_LY,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
joined;
-- COMMAND ----------
-- DBTITLE 1,maket_sales_cht
INSERT OVERWRITE TABLE
DM.DM_TF_EXT_THC_MARKET_SALES_CHT
WITH
t1_filtered AS (
SELECT
*
FROM
DM.DM_TF_EXT_THC_SALES
WHERE DATA_SOURCE = 'THC(Quarterly)'
and AUDIT_CODE <> 'CHT'
-- 谓词下推,最先过滤
),
mapping_cleaned AS (
SELECT
*
FROM
(
SELECT
PACK_CODE,
'THC(Quarterly)' as DATASOURCE,
MARKET,
KEY_COMPETITOR,
CLASS,
MARKET_RATIO,
Value_brand_Ratio,
Unit_brand_Ratio,
CountingUnit_brand_Ratio,
PDOT_brand_Ratio,
GREATEST (
starttime,
Value_brand_Ratio_start,
Unit_brand_start,
CountingUnit_brand_start,
PDOT_brand_start
) AS eff_start,
LEAST (
endtime,
Value_brand_Ratio_end,
Unit_brand_end,
CountingUnit_brand_end,
PDOT_brand_end
) AS eff_end
FROM
DM.dm_td_ext_THC_market_pack_mapping
) tmp
WHERE
eff_start <= eff_end -- 在外层子查询中过滤
),
joined AS (
SELECT
/*+ BROADCAST(t2, t3) */
t1.*,
-- 当年 ratio
t2.MARKET,
t2.KEY_COMPETITOR,
t2.CLASS,
t2.MARKET_RATIO AS cy_market_ratio,
t2.Value_brand_Ratio AS cy_value_ratio,
t2.Unit_brand_Ratio AS cy_unit_ratio,
t2.CountingUnit_brand_Ratio AS cy_cu_ratio,
t2.PDOT_brand_Ratio AS cy_pdot_ratio,
-- 去年 ratio
t3.MARKET_RATIO AS ly_market_ratio,
t3.Value_brand_Ratio AS ly_value_ratio,
t3.Unit_brand_Ratio AS ly_unit_ratio,
t3.CountingUnit_brand_Ratio AS ly_cu_ratio,
t3.PDOT_brand_Ratio AS ly_pdot_ratio
FROM
t1_filtered t1
LEFT JOIN mapping_cleaned t2 ON t1.PACK_CODE = t2.PACK_CODE
AND t1.DATA_SOURCE = t2.DATASOURCE
AND t1.YYYYMM >= t2.eff_start
AND t1.YYYYMM <= t2.eff_end
LEFT JOIN mapping_cleaned t3 ON t1.PACK_CODE = t3.PACK_CODE
AND t1.DATA_SOURCE = t3.DATASOURCE
AND (t1.YYYYMM - 100) >= t3.eff_start
AND (t1.YYYYMM - 100) <= t3.eff_end
AND t2.MARKET = t3.MARKET -- 保持原逻辑
)
SELECT
NVL(MARKET, 'ALL Market') AS MARKET,
NVL(KEY_COMPETITOR, 'OTHERS') AS KEY_COMPETITOR,
CASE
WHEN trim(lower(CLASS)) = 'others' THEN 'OTHERS'
WHEN CLASS IS NULL THEN 'OTHERS'
ELSE CLASS
END AS CLASS,
YYYYMM,
PACK_CODE,
CORP_CODE,
AUDIT_CODE,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
DATA_SOURCE,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
NEW_CODE,
INST_CODE,
NULL AS AIA_HP_FLAG,
DEPT_NAME,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE AS PRESCRIPTION_TYPE,
PRESCRIPTION,
PRESCRIPTION_LY,
-- 预计算公用因子,避免重复计算
NVL(NVL(cy_market_ratio, 1) * cy_value_ratio, 1) AS VA,
NVL(NVL(cy_market_ratio, 1) * cy_unit_ratio, 1) AS UT,
NVL(NVL(cy_market_ratio, 1) * cy_cu_ratio, 1) AS CU,
NVL(NVL(cy_market_ratio, 1) * cy_pdot_ratio, 1) AS PT,
CAST(
SALES_UNIT_CAL * NVL(cy_unit_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL,
CAST(
SALES_UNIT_CAL_LY * NVL(ly_unit_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_UNIT_CAL_LY,
CAST(
SALES_VALUE_CAL * NVL(cy_value_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL,
CAST(
SALES_VALUE_CAL_LY * NVL(ly_value_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS SALES_VALUE_CAL_LY,
CAST(
CONUTING_UNIT * NVL(cy_cu_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT,
CAST(
CONUTING_UNIT_LY * NVL(ly_cu_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS CONUTING_UNIT_LY,
CAST(
CONUTING_UNIT * NVL(cy_pdot_ratio, 1) * NVL(cy_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT,
CAST(
CONUTING_UNIT_LY * NVL(ly_pdot_ratio, 1) * NVL(ly_market_ratio, 1) AS DECIMAL(35, 6)
) AS PDOT_LY,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
joined;