-- Databricks notebook source -- 20260130 临时方案,解决测试环境有NULL值问题 create or replace temporary view dwd_ims_td_pack as select distinct pack_id, pack_code from dwd.dwd_ims_td_pack -- COMMAND ---------- CREATE OR REPLACE TEMPORARY VIEW FACT_CHPA_SALES_TEMP_WITH_PREVIOUS AS SELECT CAST(YM AS INT) AS YM, CAST(YEAR AS INT) AS YEAR, CAST(REPLACE(YM,YEAR,'') AS INT) AS MONTH, CAST(VALUE AS DECIMAL(38,10)) AS VALUE, CAST(COUNTINGUNIT AS DECIMAL(38,10)) AS COUNTINGUNIT, CAST(TOTALUNIT AS DECIMAL(38,10)) AS TOTALUNIT, PROVINCE_C, PHCD, conversion_ratio FROM dwd.dwd_gnd_pharbers_prov_fact -- COMMAND ---------- CREATE OR REPLACE TEMPORARY VIEW DIM_PROVINCE AS select -- t2.geo_key , t2.province_code, CASE WHEN t2.province_name IN ('上海','北京','天津','重庆') THEN CONCAT(t2.province_name,'市') WHEN t2.province_name IN ('内蒙古','西藏') THEN CONCAT(t2.province_name,'自治区') WHEN t2.province_name = '宁夏' THEN CONCAT(t2.province_name,'回族自治区') WHEN t2.province_name = '广西' THEN CONCAT(t2.province_name,'壮族自治区') WHEN t2.province_name = '新疆' THEN CONCAT(t2.province_name,'维吾尔自治区') ELSE CONCAT(t2.province_name,'省') END AS province_name from dm.dm_td_geography t2 where t2.geo_key = t2.province_code and province_name not in ('台湾','澳门','香港') -- COMMAND ---------- CREATE OR REPLACE TEMPORARY VIEW DIM_CHPA_PACK_INFO AS SELECT PACK_COD, MAX(IQVIA_PACK_CODE) AS IQVIA_PACK_CODE, MAX(countingunit) as COUNTINGUNIT_RATIO FROM dwd.dwd_gnd_dept_pack_property GROUP BY PACK_COD -- COMMAND ---------- CREATE OR REPLACE TEMPORARY VIEW FACT_CHPA_SALES AS SELECT YM, YEAR, REPLACE(YM,YEAR,'') AS MONTH, T3.IQVIA_PACK_CODE AS PACK_CODE, T2.PROVINCE_CODE AS AUDIT_COD, VALUE AS MTH00LC, 0 AS MTH00LCLY, CASE WHEN T3.COUNTINGUNIT_RATIO IS NULL THEN TOTALUNIT*T1.conversion_ratio ELSE TOTALUNIT*T3.COUNTINGUNIT_RATIO END AS MTH00CN, 0 AS MTH00CNLY, TOTALUNIT AS MTH00UN, 0 AS MTH00UNLY FROM FACT_CHPA_SALES_TEMP_WITH_PREVIOUS AS T1 LEFT JOIN DIM_PROVINCE AS T2 ON T1.PROVINCE_C = T2.PROVINCE_NAME LEFT JOIN DIM_CHPA_PACK_INFO AS T3 ON T1.PHCD = T3.PACK_COD UNION ALL SELECT CAST((YEAR+1)*100+REPLACE(YM,YEAR,'') AS INT) AS YM, YEAR+1, CAST(REPLACE(YM,YEAR,'') AS INT) AS MONTH, T3.IQVIA_PACK_CODE AS PACK_CODE, T2.PROVINCE_CODE AS AUDIT_COD, 0 AS MTH00LC, VALUE AS MTH00LCLY, 0 AS MTH00CN, CASE WHEN T3.COUNTINGUNIT_RATIO IS NULL THEN TOTALUNIT*T1.conversion_ratio ELSE TOTALUNIT*T3.COUNTINGUNIT_RATIO END AS MTH00CNLY, 0 AS MTH00UN, TOTALUNIT AS MTH00UNLY FROM FACT_CHPA_SALES_TEMP_WITH_PREVIOUS AS T1 LEFT JOIN DIM_PROVINCE AS T2 ON T1.PROVINCE_C = T2.PROVINCE_NAME LEFT JOIN DIM_CHPA_PACK_INFO AS T3 ON T1.PHCD = T3.PACK_COD -- COMMAND ---------- -- 20260119 dwd.dwd_ims_td_pack、dwd.dwd_ims_tf_fact_sales 不依赖 --删除最近五年的dim pack数据,将最新的dim pack写入 delete from dws.dws_ims_td_pack_ym where ym + 500 > (select max(year * 100 + month) from dwd.dwd_ims_tf_fact_sales); insert into dws.dws_ims_td_pack_ym select distinct ym, pack_id, pack_code, from_utc_timestamp(current_timestamp(), 'UTC+8'), from_utc_timestamp(current_timestamp(), 'UTC+8') from ( select distinct year * 100 + month as ym from dwd.dwd_ims_tf_fact_sales where year * 100 + month + 500 > ( select max(year * 100 + month) from dwd.dwd_ims_tf_fact_sales ) ) full -- join dwd.dwd_ims_td_pack join dwd_ims_td_pack ; -- COMMAND ---------- --IMS 原始数据预处理 --FACT SALES 处理 insert overwrite table tmp.tmp_ims_tf_fact_sales select distinct FACT.YM,AUDIT.Audit_Code AS AUDIT_COD,FACT.Pack_Code AS PACK_COD, sum(MTH00LC) as MTH00LC,sum(MTH00LCLY) as MTH00LCLY, sum(MTH00CN) as MTH00CN,sum(MTH00CNLY) as MTH00CNLY, sum(MTH00UN) as MTH00UN,sum(MTH00UNLY) as MTH00UNLY from( select Year*100+Month as YM, Year , Month , t2.pack_code , Audit_ID , SalesValue_LC as MTH00LC, 0 as MTH00LCLY, Counting_Unit as MTH00CN, 0 as MTH00CNLY, Sales_Unit as MTH00UN, 0 as MTH00UNLY from dwd.dwd_ims_tf_fact_sales t1 --补充历史的PACK ID 和 PACK CODE的关联关系,带上YM将最新dim pack表full join 最近五年 left join dws.dws_ims_td_pack_ym t2 on t1.Pack_ID=t2.pack_id and t1.Year*100+t1.Month=t2.ym union all select (Year+1)*100+Month as YM, Year+1 , Month , t2.pack_code , Audit_ID , 0 as MTH00LC, SalesValue_LC as MTH00LCLY, 0 as MTH00CN, Counting_Unit as MTH00CNLY, 0 as MTH00UN, Sales_Unit as MTH00UNLY from dwd.dwd_ims_tf_fact_sales t1 --补充历史的PACK ID 和 PACK CODE的关联关系,带上YM将最新dim pack表full join 最近五年 left join dws.dws_ims_td_pack_ym t2 on t1.Pack_ID=t2.pack_id and t1.Year*100+t1.Month=t2.ym )FACT LEFT JOIN dwd.dwd_ims_td_audit AUDIT ON FACT.Audit_ID = AUDIT.Audit_ID where FACT.YM<=(select max(Year*100+Month) from dwd.dwd_ims_tf_fact_sales) ----------20260320 chenwu CHPA的数据只保留202201之后的 and FACT.YM>=202201 ---------- AND AUDIT.Audit_Code = 'CHT' group by FACT.YM,AUDIT.Audit_Code,FACT.Pack_Code -- 20260120新增 -- 以前 全国和省份数据放在一起,现在全国是上面,省份是新的,单独放在下面。 union all select distinct CAST(YM AS INT) AS YM, AUDIT_COD,Pack_Code AS PACK_COD, sum(MTH00LC) as MTH00LC,sum(MTH00LCLY) as MTH00LCLY, sum(MTH00CN) as MTH00CN,sum(MTH00CNLY) as MTH00CNLY, sum(MTH00UN) as MTH00UN,sum(MTH00UNLY) as MTH00UNLY from FACT_CHPA_SALES where YM<=(select max(YM) from dwd.dwd_gnd_pharbers_prov_fact) group by YM,Audit_Cod,Pack_Code ;