-- Databricks notebook source -- create table dm.dm_ext_thc_sales -- ( -- yyyymm STRING, -- audit_cod STRING, -- pack_cod STRING, -- corp_cod STRING, -- sales_value_cal decimal(38,10), -- sales_unit_cal decimal(38,10), -- counting_unit decimal(38,10), -- sales_value_cal_ly decimal(38,10), -- sales_unit_cal_ly decimal(38,10), -- counting_unit_ly decimal(38,10) -- ) -- using delta -- COMMAND ---------- -------------------------------------------------------------------------- --修改时间:20241106 --修改人:Fanxujia --修改内容: --THC原本只有全国数据,现在增加14个省份的数据。 --但是全国数据是半年提供一次,省份数据一个季度提供一次 --目前需求是将季度数据放入模型,而全国数据单独成一个事实表,供PBI单独展示 --这里生成的事实表里全国和省份都包含了,在写入模型的时候用where条件筛选 --------------------------------------------------------------------------- --rawdata年月处理 with thc_rawdata1 as ( select case when right(yq,2) = 'Q1' then concat(left(yq,4),'03') when right(yq,2) = 'Q2' then concat(left(yq,4),'06') when right(yq,2) = 'Q3' then concat(left(yq,4),'09') when right(yq,2) = 'Q4' then concat(left(yq,4),'12') else yq end as yyyymm, city_c, pack_cod, prod_cod, value, totalunit, countingunit from dwd.dwd_gnd_ext_thc_rawdata ) --CHPA渠道对应的公司编码以及名称 ,chpa_corp as ( select distinct PACK_COD, CORP_COD, CORP_DES from dm.dm_ims_td_pack_property ) --从thc渠道的pack_property里取到对应的iqvia_pack_code、countingunit --然后corp_cod优先取CHPA渠道的 ,thc_corp as ( select t1.pack_cod, t1.iqvia_pack_code, t1.countingunit, coalesce(t2.CORP_COD, t1.CORP_COD) as CORP_COD , coalesce(t2.CORP_DES,t1.CORP_DES ) as CORP_DES from dwd.dwd_gnd_ext_thc_pack_property t1 left join chpa_corp t2 on t1.iqvia_pack_code = t2.PACK_COD ) --编辑thc渠道的audit_cod --全国数据设置成CHT --省份数据使用公共的地理维表里的省份编码作为audit_cod ,thc_geo as ( select t1.city_c, t1.city_e, t1.province_c, t1.province_e, case when t1.province_c = '全国' then 'CHT' when t2.geo_key is null then t1.province_e else t2.geo_key end as AUDIT_COD from dwd.dwd_gnd_ext_thc_geo t1 left join dm.dm_td_geography t2 --THC最细到省,用省关联 on t1.province_c = t2.province_name and t2.geo_key = t2.province_code ) --将上面的rawdata、corp、audit信息合到一起 ,thc_rawdata2 as ( select t1.yyyymm, t1.city_c, t1.pack_cod as pack_cod_old, t2.iqvia_pack_code as pack_cod, t2.CORP_COD, t3.AUDIT_COD, t1.prod_cod, t1.value, t1.totalunit, t1.totalunit * t2.countingunit as countingunit from thc_rawdata1 t1 left join thc_corp t2 on t1.pack_cod = t2.pack_cod left join thc_geo t3 on t1.city_c = t3.city_c ) --最大的月份,计算ly时会需要 ,max_thc_rawdata as ( select max(cast(yyyymm as int)) as max_ym from thc_rawdata2 ) --计算ly ,thc_rawdata3 as ( select yyyymm, AUDIT_COD, pack_cod, CORP_COD, value, totalunit, countingunit, 0 as value_ly, 0 as totalunit_ly, 0 as countingunit_ly from thc_rawdata2 union all select cast(t1.yyyymm as int) + 100 as yyyymm, t1.AUDIT_COD, t1.pack_cod, t1.CORP_COD, 0 as value, 0 as totalunit, 0 as countingunit, t1.value as value_ly, t1.totalunit as totalunit_ly, t1.countingunit as countingunit_ly from thc_rawdata2 t1 cross join max_thc_rawdata t2 where cast(t1.yyyymm as int) + 100 <= t2.max_ym ) --按年月、audit_cod、pack_cod、Corp_cod汇总 ,thc_rawdata4 as ( select yyyymm, AUDIT_COD, pack_cod, CORP_COD, sum(value) as SALES_VALUE_CAL, sum(totalunit) as SALES_UNIT_CAL, sum(countingunit) as COUNTING_UNIT, sum(value_ly) as SALES_VALUE_CAL_LY, sum(totalunit_ly) as SALES_UNIT_CAL_LY, sum(countingunit_ly) as COUNTING_UNIT_LY from thc_rawdata3 group by yyyymm, AUDIT_COD, pack_cod, CORP_COD ) insert overwrite table dm.dm_ext_thc_sales ( yyyymm, AUDIT_COD, pack_cod, CORP_COD, SALES_VALUE_CAL, SALES_UNIT_CAL, COUNTING_UNIT, SALES_VALUE_CAL_LY, SALES_UNIT_CAL_LY, COUNTING_UNIT_LY ) select yyyymm, AUDIT_COD, pack_cod, CORP_COD, SALES_VALUE_CAL, SALES_UNIT_CAL, COUNTING_UNIT, SALES_VALUE_CAL_LY, SALES_UNIT_CAL_LY, COUNTING_UNIT_LY from thc_rawdata4 --取2018年之后的数 where yyyymm >= '201801'