-- Databricks notebook source -- CREATE OR REPLACE TABLE DM.DM_TF_EXT_THC_SALES ( -- YYYYMM STRING, -- PACK_CODE STRING, -- CORP_CODE STRING, -- AUDIT_CODE STRING, -- PLATFORM_TYPE STRING, -- STORE_NAME STRING, -- STORE_TYPE STRING, -- REGION_TYPE STRING, -- PACK_FLAG INT, -- PROD_FLAG INT, -- DTP_FLAG INT, -- SALES_UNIT_CAL DECIMAL(38,10), -- SALES_UNIT_CAL_LY DECIMAL(38,10), -- SALES_VALUE_CAL DECIMAL(38,10), -- SALES_VALUE_CAL_LY DECIMAL(38,10), -- CONUTING_UNIT DECIMAL(38,10), -- CONUTING_UNIT_LY DECIMAL(38,10), -- DATA_SOURCE STRING, -- INST_CODE STRING COMMENT '内部机构编码', -- CMPS_FLAG STRING COMMENT '分子式标签', -- DEPT_NAME STRING COMMENT '科室名称', -- PRESCRIPTION DECIMAL(38,10) COMMENT '处方张数', -- PRESCRIPTION_LY DECIMAL(38,10) COMMENT '去年同期处方张数', -- NEW_CODE STRING COMMENT '主数据关联CODE', -- AREA STRING COMMENT '城市', -- H_LEVEL STRING COMMENT '医院类型', -- REIMBURSE STRING COMMENT '报销情况', -- REIMBURSE_TYPE STRING COMMENT '报销类型', -- PRESCRIPTION_SOURCE STRING COMMENT '处方来源', -- ETL_INSERT_DT TIMESTAMP, -- ETL_UPDATE_DT TIMESTAMP) -- USING delta -- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_thc_sales'; -- -- 上面是生产环境location,下面是测试环境location -- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_thc_sales'; -- COMMAND ---------- -------------------------------------------------------------------------- --修改时间:20241106 --修改人:Fanxujia --修改内容: --THC原本只有全国数据,现在增加14个省份的数据。 --但是全国数据是半年提供一次,省份数据一个季度提供一次 --目前需求是将季度数据放入模型,而全国数据单独成一个事实表,供PBI单独展示 --这里生成的事实表里全国和省份都包含了,在写入模型的时候用where条件筛选 --------------------------------------------------------------------------- -- COMMAND ---------- --rawdata年月处理 CREATE OR REPLACE TEMPORARY VIEW thc_rawdata1 AS select case when right(yq,2) = 'Q1' then concat(left(yq,4),'03') when right(yq,2) = 'Q2' then concat(left(yq,4),'06') when right(yq,2) = 'Q3' then concat(left(yq,4),'09') when right(yq,2) = 'Q4' then concat(left(yq,4),'12') else yq end as yyyymm, city_c, pack_cod, prod_cod, value, totalunit, countingunit from dwd.dwd_gnd_ext_thc_rawdata -- COMMAND ---------- --CHPA渠道对应的公司编码以及名称 CREATE OR REPLACE TEMPORARY VIEW chpa_corp AS select distinct PACK_COD, CORP_COD, CORP_DES from dm.dm_ims_td_pack_property -- COMMAND ---------- --从thc渠道的pack_property里取到对应的iqvia_pack_code、countingunit --然后corp_cod优先取CHPA渠道的 CREATE OR REPLACE TEMPORARY VIEW thc_corp AS select t1.pack_cod, t1.iqvia_pack_code, t1.countingunit, coalesce(t2.CORP_COD, t1.CORP_COD) as CORP_COD , coalesce(t2.CORP_DES,t1.CORP_DES ) as CORP_DES from dwd.dwd_gnd_dept_pack_property t1 left join chpa_corp t2 on t1.iqvia_pack_code = t2.PACK_COD -- COMMAND ---------- --编辑thc渠道的audit_cod --全国数据设置成CHT --省份数据使用公共的地理维表里的省份编码作为audit_cod CREATE OR REPLACE TEMPORARY VIEW thc_geo AS select t1.city_c, t1.city_e, t1.province_c, t1.province_e, case when t1.province_c = '全国' then 'CHT' when t2.geo_key is null then t1.province_e else t2.geo_key end as AUDIT_COD from dwd.dwd_gnd_ext_thc_geo t1 left join dm.dm_td_geography t2 --THC最细到省,用省关联 on t1.province_c = t2.province_name and t2.geo_key = t2.province_code -- COMMAND ---------- --将上面的rawdata、corp、audit信息合到一起 CREATE OR REPLACE TEMPORARY VIEW thc_rawdata2 AS select t1.yyyymm, t1.city_c, t1.pack_cod as pack_cod_old, t2.iqvia_pack_code as pack_cod, t2.CORP_COD, t3.AUDIT_COD, t1.prod_cod, t1.value, t1.totalunit, t1.totalunit * t2.countingunit as countingunit from thc_rawdata1 t1 left join thc_corp t2 on t1.pack_cod = t2.pack_cod left join thc_geo t3 on t1.city_c = t3.city_c -- COMMAND ---------- --最大的月份,计算ly时会需要 CREATE OR REPLACE TEMPORARY VIEW max_thc_rawdata AS select max(cast(yyyymm as int)) as max_ym from thc_rawdata2 -- COMMAND ---------- --计算ly CREATE OR REPLACE TEMPORARY VIEW thc_rawdata3 AS select yyyymm, AUDIT_COD, pack_cod, CORP_COD, value, totalunit, countingunit, 0 as value_ly, 0 as totalunit_ly, 0 as countingunit_ly from thc_rawdata2 union all select cast(t1.yyyymm as int) + 100 as yyyymm, t1.AUDIT_COD, t1.pack_cod, t1.CORP_COD, 0 as value, 0 as totalunit, 0 as countingunit, t1.value as value_ly, t1.totalunit as totalunit_ly, t1.countingunit as countingunit_ly from thc_rawdata2 t1 cross join max_thc_rawdata t2 where cast(t1.yyyymm as int) + 100 <= t2.max_ym -- COMMAND ---------- --按年月、audit_cod、pack_cod、Corp_cod汇总 CREATE OR REPLACE TEMPORARY VIEW thc_rawdata4 AS select yyyymm, AUDIT_COD, pack_cod, CORP_COD, sum(value) as SALES_VALUE_CAL, sum(totalunit) as SALES_UNIT_CAL, sum(countingunit) as COUNTING_UNIT, sum(value_ly) as SALES_VALUE_CAL_LY, sum(totalunit_ly) as SALES_UNIT_CAL_LY, sum(countingunit_ly) as COUNTING_UNIT_LY from thc_rawdata3 group by yyyymm, AUDIT_COD, pack_cod, CORP_COD -- COMMAND ---------- CREATE OR REPLACE TEMPORARY VIEW THC_SALES AS SELECT A.YYYYMM, A.PACK_COD AS PACK_CODE, A.AUDIT_COD AS AUDIT_CODE, 'THC(Quarterly)' DATA_SOURCE, ------------------------------------- SALES_UNIT_CAL, SALES_UNIT_CAL_LY, SALES_VALUE_CAL, SALES_VALUE_CAL_LY, COUNTING_UNIT, COUNTING_UNIT_LY , null as prescription, null as prescription_ly, ------------------------------------- --Retail藏数逻辑标签------------------ 1 PACK_FLAG, 1 PROD_FLAG, 0 DTP_FLAG, null as cmps_flag, ------------------------------------- --EC数据标签-------------------------- '' PLATFORM_TYPE, '' STORE_NAME, '' STORE_TYPE, ------------------------------------- --COUNTY数据标签---------------------- '' REGION_TYPE, ------------------------------------- --AIA数据标签------------------------- null inst_code, --可以移除,报告没有使用 ------------------------------------- --XIE HE 数据标签--------------------- '' as dept_name, '' as new_code, '' as area, '' as h_level, '' as reimburse, '' as reimburse_type, '' as prescription_source, ------------------------------------- '' as CORP_CODE --可以移除,报告没有使用 ------------------------------------- FROM thc_rawdata4 A -- 条件后移到 unionall_sales -- WHERE -- AUDIT_COD = 'CHT' -- COMMAND ---------- INSERT OVERWRITE TABLE DM.DM_TF_EXT_THC_SALES ( YYYYMM, PACK_CODE, AUDIT_CODE, DATA_SOURCE, SALES_UNIT_CAL, SALES_UNIT_CAL_LY, SALES_VALUE_CAL, SALES_VALUE_CAL_LY, CONUTING_UNIT, CONUTING_UNIT_LY, PRESCRIPTION, PRESCRIPTION_LY, PACK_FLAG, PROD_FLAG, DTP_FLAG, CMPS_FLAG, PLATFORM_TYPE, STORE_NAME, STORE_TYPE, REGION_TYPE, INST_CODE, DEPT_NAME, NEW_CODE, AREA, H_LEVEL, REIMBURSE, REIMBURSE_TYPE, PRESCRIPTION_SOURCE, CORP_CODE, ETL_INSERT_DT, ETL_UPDATE_DT ) SELECT YYYYMM, NVL ( NULLIF(PACK_CODE, ''), CONCAT ('PACK_CODE_', DATA_SOURCE) ) AS PACK_CODE, NVL (NULLIF(AUDIT_CODE, ''), 'ROC') AS AUDIT_CODE, DATA_SOURCE, SALES_UNIT_CAL, SALES_UNIT_CAL_LY, SALES_VALUE_CAL, SALES_VALUE_CAL_LY, COUNTING_UNIT, COUNTING_UNIT_LY, PRESCRIPTION, PRESCRIPTION_LY, PACK_FLAG, PROD_FLAG, DTP_FLAG, CMPS_FLAG, PLATFORM_TYPE, STORE_NAME, STORE_TYPE, REGION_TYPE, INST_CODE, DEPT_NAME, NEW_CODE, AREA, H_LEVEL, REIMBURSE, REIMBURSE_TYPE, PRESCRIPTION_SOURCE, CORP_CODE, FROM_UTC_TIMESTAMP (CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT, FROM_UTC_TIMESTAMP (CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT FROM THC_SALES ; -- COMMAND ---------- insert overwrite table dm.dm_ext_thc_sales ( yyyymm, AUDIT_COD, pack_cod, CORP_COD, SALES_VALUE_CAL, SALES_UNIT_CAL, COUNTING_UNIT, SALES_VALUE_CAL_LY, SALES_UNIT_CAL_LY, COUNTING_UNIT_LY ) select yyyymm, AUDIT_COD, pack_cod, CORP_COD, SALES_VALUE_CAL, SALES_UNIT_CAL, COUNTING_UNIT, SALES_VALUE_CAL_LY, SALES_UNIT_CAL_LY, COUNTING_UNIT_LY from thc_rawdata4 --取2018年之后的数 where yyyymm >= '201801'