update20260427

This commit is contained in:
2026-04-27 11:04:09 +08:00
commit 3e14e78af3
173 changed files with 62579 additions and 0 deletions

617
Retail/01 load_tmp_data.py Normal file
View File

@@ -0,0 +1,617 @@
# Databricks notebook source
### constant
LEVEL_TA_CV = 'CV'
LEVLE_TA_RE = 'RE'
LEVEL_TA_RE2 = 'RE2'
LEVEL_TA_GI = 'GI'
LEVEL_TA_DM = 'DM'
LEVEL_TA_RD = 'RD'
LEVEL_MARKET_HTN = '高血压用药'
LEVEL_MARKET_STATIN_XZK = '他汀类+血脂康'
LEVEL_MARKET_BRILINTA = 'Brilinta Market'
LEVEL_MARKET_COPD = '小儿咳喘'
LEVEL_MARKET_AAGSA_PPI_ORAL = '慢性胃炎、胃溃疡'
LEVEL_MARKET_ATOMIZER = '慢性阻塞性肺疾病'
LEVEL_MARKET_NIAD = 'NIAD'
LEVEL_MARKET_RD = 'RD Market'
# COMMAND ----------
############################################################START##############################################################
### STEP-1: load rawdata to tmp table
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-1: load rawdata to tmp table
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.1 load brand data to tmp.tmp_retail_brand_rawdata
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_brand_rawdata
# MAGIC
# MAGIC select
# MAGIC type,
# MAGIC ta,
# MAGIC market,
# MAGIC zk_brand_category,
# MAGIC zk_common_name,
# MAGIC zk_manu_des,
# MAGIC rc_name_en,
# MAGIC province_city,
# MAGIC quarter,
# MAGIC ytd,
# MAGIC '',
# MAGIC sales_value * 1000000,
# MAGIC sales_volume * 1000000
# MAGIC from dwd.dwd_gnd_ext_zk_brand
# MAGIC where ranked_by = 'value'
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.1 load from dwd.dwd_gnd_ext_retail_htn (pack-CV-高血压-化学药-全国.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 'CV',
# MAGIC '高血压用药',
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_htn
# MAGIC --------------------------------- *hard_code* ---------------------------------------
# MAGIC -- 多达一、天一宁、others、氨氯地平阿托伐他汀钙 数据在 高血压和他汀血脂康里面重复,
# MAGIC -- 只需要取一份,优先取有拆分比例的
# MAGIC -- 对应的product_id 分别是:'-356','5258049','5852881','9167744','9526959','9167556','9279325','8533952'
# MAGIC -- 或者使用条件zk_common_name <> '氨氯地平阿托伐他汀钙'
# MAGIC where zk_common_name <> '氨氯地平阿托伐他汀钙'
# MAGIC --------------------------------- *hard_code* ---------------------------------------
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.2 load from dwd.dwd_gnd_ext_retail_statin_xzk (pack-CV-他汀类+血脂康-全国.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 'CV',
# MAGIC '他汀类+血脂康',
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_statin_xzk
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.3 load from dwd.dwd_gnd_ext_retail_nataional_oap (pack-CV-抗血栓2通用名-全国.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 'CV',
# MAGIC 'Brilinta Market',
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_nataional_oap
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.4 load from dwd.dwd_gnd_ext_retail_anti_asthma_copd (pack-RE-慢阻肺-全国.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC --------------2024-----------
# MAGIC --'RE',
# MAGIC --'慢性阻塞性肺疾病',
# MAGIC --------------2025-----------
# MAGIC 'RE2',
# MAGIC '小儿咳喘',
# MAGIC -----------------------------
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_anti_asthma_copd
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.5 load from dwd.dwd_gnd_ext_retail_aagsa_ppi_oral (pack-GI-慢性胃炎胃溃疡-全国.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null as pack_code,
# MAGIC null as molecule_code,
# MAGIC null as molecule_desc,
# MAGIC null as product_code,
# MAGIC null as product_desc,
# MAGIC 'GI' as level_ta,
# MAGIC '慢性胃炎、胃溃疡' as level_market,
# MAGIC null as level_molecule,
# MAGIC null as level_brand,
# MAGIC null as ratio_val,
# MAGIC null as ratio_vol,
# MAGIC 0 as data_flag,
# MAGIC null as brand_flag
# MAGIC from dwd.dwd_gnd_ext_retail_aagsa_ppi_oral
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.6 load from dwd.dwd_gnd_ext_retail_atomizer (pack-雾化器-全国&县域数据.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 'RE',
# MAGIC '慢性阻塞性肺疾病',
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_atomizer
# MAGIC ------------------------------ *hard_code* ------------------------------------------
# MAGIC -- 24年关于 RE-慢性阻塞性肺疾病的PACK数据额外买了“雾化器”的24省数据
# MAGIC -- 所以这里只取24省明细数据全国的数已经在“pack-GI-慢性胃炎胃溃疡-全国.xlsx”中取过了
# MAGIC -- 25年就直接取全部数据
# MAGIC -- where zk_region <> '全国'
# MAGIC ------------------------------ *hard_code* ------------------------------------------
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.7 load from dwd.dwd_gnd_ext_retail_nataional_niad (pack-DM-口服降糖化学药.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 'DM',
# MAGIC 'NIAD',
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_nataional_niad
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.8 load from dwd.dwd_gnd_ext_retail_nataional_rd (pack-RD-肾科-全国.xlsx)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC ------------------------------ *hard_code* ------------------------------------------
# MAGIC -- 剔除利尿剂数据
# MAGIC with tmp_exclusion as (
# MAGIC select distinct product_id
# MAGIC from dwd.dwd_gnd_ext_retail_pack_property
# MAGIC where lower(atc2_cod) = 'c03'
# MAGIC )
# MAGIC ------------------------------ *hard_code* ------------------------------------------
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 'RD',
# MAGIC 'RD Market',
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_nataional_rd a
# MAGIC where not exists (
# MAGIC select * from tmp_exclusion b
# MAGIC where case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end = b.product_id
# MAGIC )
# MAGIC
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# MAGIC -- 1.2.9 load from dwd.dwd_gnd_ext_retail_metoprolol_tartrat (pack-CV-酒石酸美托洛尔.xlsx)
# MAGIC -- 2024年采买了酒石酸美托洛尔的PACK数据2025年未采买
# MAGIC -------------------------------------------------------------------------------------
# MAGIC --insert into table tmp.tmp_retail_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# MAGIC zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC month,
# MAGIC quarter,
# MAGIC replace(price,',','' ),
# MAGIC replace(sales_unit,',','' ),
# MAGIC replace(sales_value,',','' ),
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 'CV',
# MAGIC '高血压用药',
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0,
# MAGIC null
# MAGIC from dwd.dwd_gnd_ext_retail_metoprolol_tartrat a
# MAGIC ------------------------------ *hard_code* ------------------------------------------
# MAGIC -- 24年关于 CV-高血压的PACK数据额外买了“酒石酸美托洛尔”的24省数据
# MAGIC -- 所以这里只取24省明细数据全国的数已经在“pack-CV-高血压-化学药-全国.xlsx”中取过了
# MAGIC where zk_region <> '全国'
# MAGIC ------------------------------ *hard_code* ------------------------------------------
# COMMAND ----------
# %sql
# 20260302 chenwu取消此段运行小儿咳喘模版暂不使用。Fasenra Market数据由DTP RAW DATA上传
# -------------------------------------------------------------------------------------
# -- STEP-1: load rawdata to tmp table
# -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata
# -- 1.2.10 load from dwd.dwd_gnd_ext_retail_asthma (pack-手动补充)
# -- 手动补充的小儿咳喘的数据,这些数据无需拆分
# -------------------------------------------------------------------------------------
# insert into table tmp.tmp_retail_pack_rawdata
# select
# case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id,
# zk_product_id,
# zk_region,
# zk_rx_otc,
# zk_medicine_type,
# zk_medicine_tier1,
# zk_medicine_tier2,
# zk_medicine_tier3,
# zk_medicine_tier4,
# zk_common_name,
# zk_dosage_form,
# zk_user_type,
# zk_category_name,
# zk_product_name,
# zk_brand_name,
# zk_manu_des,
# zk_corp_des,
# zk_pack_des,
# month,
# quarter,
# price,
# sales_unit,
# sales_value,
# digital_spread_rate,
# weighted_spread_rate,
# counting_unit,
# null,
# null,
# null,
# null,
# null,
# 'RE_NO_SPLIT',
# '小儿咳喘',
# null,
# null,
# null,
# null,
# 0,
# null
# from dwd.dwd_gnd_ext_retail_asthma
# COMMAND ----------
############################################################END##############################################################

View File

@@ -0,0 +1,672 @@
# Databricks notebook source
### constant
DATA_TYPE_MARKET = 'MARKET'
DATA_TYPE_MOLECULE = 'MOLECULE'
DATA_TYPE_BRAND = 'BRAND'
DATA_TYPE_MULTI_MARKET = 'MULTI_MARKET'
DATA_TYPE_MULTI_MOLECULE = 'MULTI_MOLECULE'
DATA_TYPE_MULTI_BRAND = 'MULTI_BRAND'
DATA_TYPE_DUPLICATE = 'DUPLICATE'
DATA_TYPE_HEDGE_TA = 'HEDGE_TA'
DATA_TYPE_HEDGE_MARKET = 'HEDGE_MARKET'
DATA_TYPE_HEDGE_MOLECULE = 'HEDGE_MOLECULE'
DATA_TYPE_HEDGE_BRAND = 'HEDGE_BRAND'
DATA_FLAG_RAW = 0 ### final pack_flag = 1
DATA_FLAG_SUB = 1 ### final pack_flag = 2
DATA_FLAG_ADD = 2 ### final pack_flag = 2
DATA_FLAG_RATIO = 3 ### final pack_flag = 2
DATA_FLAG_AVG = 4 ### final pack_flag = 2 ### The current version is not in use
DATA_FLAG_HEDGE = 5 ### final pack_flag = 2
BRAND_FLAG_NATIVE = 1 ### final brand_flag = 1
BRAND_FLAG_CALCULATE = 2 ### final brand_flag = 2
BRAND_FLAG_NONE = 3 ### final brand_flag = 2
DATA_OTHER_TA = 'OTHER_TA'
DATA_OTHER_MARKET = 'OTHER_MARKET'
DATA_OTHER_MOLECULE = 'OTHER_MOLECULE'
DATA_OTHER_BRAND = 'OTHER_BRAND'
DATA_AREA_TYPE_ROC = 'ROC'
DATA_LABEL_AREA_ALL = '全国'
# COMMAND ----------
############################################################START##############################################################
### STEP-1: load data to tmp table
### STEP-2: substract data from level market to brand
### STEP-3: accumulate data from level brand to market
### STEP-4: calculate brand ratio
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-1: load data to tmp table
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.1 load LEVEL_MARKET data to tmp.tmp_retail_level_market
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC insert overwrite table tmp.tmp_retail_level_market
# MAGIC
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC b.level_ta,
# MAGIC b.level_market,
# MAGIC a.sales_val,
# MAGIC a.sales_vol,
# MAGIC 0
# MAGIC from tmp.tmp_retail_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level = 'MARKET'
# MAGIC
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.2 load LEVEL_MOLECULE data to tmp.tmp_retail_level_molecule
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_level_molecule
# MAGIC
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC b.level_ta,
# MAGIC b.level_market,
# MAGIC b.level_molecule,
# MAGIC a.sales_val,
# MAGIC a.sales_vol,
# MAGIC 0
# MAGIC from tmp.tmp_retail_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level = 'MOLECULE'
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.3 load LEVEL_BRAND(BRAND & MULTI_BRAND) data to tmp.tmp_retail_level_brand
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_level_brand
# MAGIC
# MAGIC with exploded_data as (
# MAGIC select
# MAGIC ta,
# MAGIC market,
# MAGIC brand,
# MAGIC ifnull(common_name,'' ) as common_name,
# MAGIC level,
# MAGIC split(level_ta,',') array_ta,
# MAGIC split(level_market,',') array_market,
# MAGIC split(level_molecule,',') array_molecule,
# MAGIC split(level_brand,',') array_brand
# MAGIC from dwd.dwd_gnd_retail_split_automatic
# MAGIC where level like '%MULTI_BRAND%'
# MAGIC ), tmp_config as (
# MAGIC select
# MAGIC ta,
# MAGIC market,
# MAGIC brand,
# MAGIC common_name,
# MAGIC level,
# MAGIC array_ta[idx] as level_ta,
# MAGIC array_market[idx] as level_market,
# MAGIC array_molecule[idx] as level_molecule,
# MAGIC array_brand[idx] as level_brand
# MAGIC from exploded_data
# MAGIC lateral view posexplode(array_ta) AS idx, split_ta
# MAGIC )
# MAGIC ----------------brand data---------------------
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC b.level_ta,
# MAGIC b.level_market,
# MAGIC b.level_molecule,
# MAGIC b.level_brand,
# MAGIC a.sales_val,
# MAGIC a.sales_vol,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0
# MAGIC from tmp.tmp_retail_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level = 'BRAND'
# MAGIC
# MAGIC union all
# MAGIC ----------------multi brand data---------------------
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC b.level_ta,
# MAGIC b.level_market,
# MAGIC b.level_molecule,
# MAGIC b.level_brand,
# MAGIC a.sales_val,
# MAGIC a.sales_vol,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0
# MAGIC from tmp.tmp_retail_brand_rawdata a
# MAGIC inner join tmp_config b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.4 append HEDGE_TA data to every level
# MAGIC -- 1.4.1 append HEDGE_MARKET data to LEVEL_MARKET
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_hedge as (
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC 0 - sum(a.sales_val) as hedge_val,
# MAGIC 0 - sum(a.sales_vol) as hedge_vol
# MAGIC from tmp.tmp_retail_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level like '%HEDGE_TA%'
# MAGIC group by a.quarter,a.province_city
# MAGIC )
# MAGIC
# MAGIC insert into tmp.tmp_retail_level_market
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC 'HEDGE_TA',
# MAGIC 'HEDGE_MARKET',
# MAGIC hedge_val,
# MAGIC hedge_vol,
# MAGIC 5
# MAGIC from tmp_hedge
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.4 append HEDGE_TA data to every level
# MAGIC -- 1.4.2 append HEDGE_MOLECULE data to LEVEL_MOLECULE
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_hedge as (
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC 0 - sum(a.sales_val) as hedge_val,
# MAGIC 0 - sum(a.sales_vol) as hedge_vol
# MAGIC from tmp.tmp_retail_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level like '%HEDGE_TA%'
# MAGIC group by a.quarter,a.province_city
# MAGIC )
# MAGIC insert into tmp.tmp_retail_level_molecule
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC 'HEDGE_TA',
# MAGIC 'HEDGE_MARKET',
# MAGIC 'HEDGE_MOLECULE',
# MAGIC hedge_val,
# MAGIC hedge_vol,
# MAGIC 5
# MAGIC from tmp_hedge
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.4 append HEDGE_TA data to every level
# MAGIC -- 1.4.3 append HEDGE_BRAND data to LEVEL_BRAND
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_hedge as (
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC 0 - sum(a.sales_val) as hedge_val,
# MAGIC 0 - sum(a.sales_vol) as hedge_vol
# MAGIC from tmp.tmp_retail_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level like '%HEDGE_TA%'
# MAGIC group by a.quarter,a.province_city
# MAGIC )
# MAGIC
# MAGIC insert into table tmp.tmp_retail_level_brand
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC 'HEDGE_TA',
# MAGIC 'HEDGE_MARKET',
# MAGIC 'HEDGE_MOLECULE',
# MAGIC 'HEDGE_BRAND',
# MAGIC hedge_val,
# MAGIC hedge_vol,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 5
# MAGIC from tmp_hedge
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.5 load data from pack, balance level_brand
# MAGIC -- add regional data that is included in the pack but not in the brand
# MAGIC -------------------------------------------------------------------------------------
# MAGIC with tmp_pack_to_brand as (
# MAGIC select distinct a.quarter,a.level_ta,a.level_market, c.cmps_des_c,c.prod_des_c,a.product_id
# MAGIC from tmp.tmp_retail_pack_rawdata a
# MAGIC inner join dwd.dwd_gnd_ext_retail_pack_property c
# MAGIC on a.product_id = c.product_id
# MAGIC where exists (
# MAGIC select distinct b.level_ta, b.level_market
# MAGIC from dwd.dwd_gnd_retail_split_automatic b
# MAGIC where b.level_ta is not null
# MAGIC and a.level_ta = b.level_ta
# MAGIC and a.level_market = b.level_market
# MAGIC ) and a.data_flag = 0
# MAGIC and a.zk_regin <> '全国'
# MAGIC order by a.quarter,a.level_ta,a.level_market, c.cmps_des_c,c.prod_des_c
# MAGIC ), tmp_pack_sum as (
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.zk_regin,
# MAGIC a.level_ta,
# MAGIC a.level_market,
# MAGIC b.cmps_des_c as molecule_desc,
# MAGIC nvl(b.prod_des_c,concat(b.cmps_des_c,'_OTHER_PROD')) as product_desc,
# MAGIC sum(a.sales_value) as sales_val,
# MAGIC round(sum(a.sales_unit * (c.counting_unit/ coalesce(c.unit,1)) ) ,4) as sales_vol
# MAGIC from tmp.tmp_retail_pack_rawdata a
# MAGIC inner join tmp_pack_to_brand b
# MAGIC on a.quarter = b.quarter
# MAGIC and a.level_ta = b.level_ta
# MAGIC and a.level_market = b.level_market
# MAGIC and a.product_id = b.product_id
# MAGIC inner join dwd.dwd_gnd_ext_retail_pack_property c
# MAGIC on a.product_id = c.product_id
# MAGIC where a.data_flag = 0 and zk_regin <> 'ROC'
# MAGIC group by a.quarter, a.zk_regin,a.level_ta,a.level_market,b.cmps_des_c,b.prod_des_c
# MAGIC order by a.quarter,a.zk_regin,a.level_ta
# MAGIC ), tmp_brand as (
# MAGIC select * from tmp.tmp_retail_level_brand where data_flag = 0
# MAGIC ),tmp_final as (
# MAGIC select
# MAGIC a.*,
# MAGIC null as ratio_val,
# MAGIC null as ratio_vol,
# MAGIC null as total_val,
# MAGIC null as total_vol,
# MAGIC 0 as data_flag --从pack聚合出来的brand数据也是真实的原生数据
# MAGIC from tmp_pack_sum a
# MAGIC where not exists (
# MAGIC select 1 from tmp_brand b
# MAGIC where a.quarter = b.quarter
# MAGIC and a.zk_regin = b.province_city
# MAGIC and a.level_ta = b.ta
# MAGIC and a.level_market = b.market
# MAGIC and a.molecule_desc = b.molecule
# MAGIC and a.product_desc = b.brand
# MAGIC )
# MAGIC )
# MAGIC
# MAGIC insert into table tmp.tmp_retail_level_brand
# MAGIC select * from tmp_final
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-2: substract data from level market to brand
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-2: substract data from level market to brand
# MAGIC -- 2.1 substract level market data and add 'OTHER_MOLECULE' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_aggregate_market as (
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_level_molecule
# MAGIC group by quarter, province_city , ta ,market
# MAGIC ),tmp_molecule as (
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC a.ta,
# MAGIC a.market,
# MAGIC a.sales_val - ifnull(b.sales_val, 0) as sales_val,
# MAGIC a.sales_vol - ifnull(b.sales_vol, 0) as sales_vol
# MAGIC from tmp.tmp_retail_level_market a
# MAGIC left join tmp_aggregate_market b
# MAGIC on a.quarter = b.quarter
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC )
# MAGIC
# MAGIC -------append OTHER_MOLECULE to tmp.tmp_retail_level_molecule
# MAGIC insert into tmp.tmp_retail_level_molecule
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC 'OTHER_MOLECULE',
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC 1
# MAGIC from tmp_molecule
# MAGIC where sales_val > 0 or sales_vol > 0
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-2: substract data from level market to brand
# MAGIC -- 2.2 substract level molecule data and add 'OTHER_BRAND' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC with tmp_aggregate_molecule as (
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_level_brand
# MAGIC group by quarter,province_city ,ta ,market,molecule
# MAGIC ),tmp_brand as (
# MAGIC select
# MAGIC a.quarter,
# MAGIC a.province_city,
# MAGIC a.ta,
# MAGIC a.market,
# MAGIC a.molecule,
# MAGIC a.sales_val - ifnull(b.sales_val, 0) as sales_val,
# MAGIC a.sales_vol - ifnull(b.sales_vol, 0) as sales_vol
# MAGIC from tmp.tmp_retail_level_molecule a
# MAGIC left join tmp_aggregate_molecule b
# MAGIC on a.quarter = b.quarter
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = b.molecule
# MAGIC )
# MAGIC
# MAGIC -------append OTHER_BRAND to tmp.tmp_retail_level_brand
# MAGIC insert into tmp.tmp_retail_level_brand
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC 'OTHER_BRAND',
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 1
# MAGIC from tmp_brand
# MAGIC where sales_val > 0 or sales_vol > 0
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-3: accumulate data from level brand to market
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.1 accumulate level molecule data and balance 'OTHER_BRAND' & 'OTHER_MOLECULE' data
# MAGIC -- 3.1.1 accumulate level molecule data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC with tmp_aggregate_molecule as (
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_level_brand
# MAGIC group by quarter,province_city ,ta ,market,molecule
# MAGIC ), tmp_append_molecule as (
# MAGIC select a.*
# MAGIC from tmp_aggregate_molecule a
# MAGIC where not exists (
# MAGIC select 1
# MAGIC from tmp.tmp_retail_level_molecule b
# MAGIC where a.quarter = b.quarter
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = b.molecule
# MAGIC )
# MAGIC )
# MAGIC -------append level molecule data accumulate from level brand
# MAGIC insert into tmp.tmp_retail_level_molecule
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC 2
# MAGIC from tmp_append_molecule
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.1 accumulate level molecule data and balance 'OTHER_BRAND'& 'OTHER_MOLECULE' data
# MAGIC -- 3.1.2 balance 'OTHER_MOLECULE' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC merge into tmp.tmp_retail_level_molecule a
# MAGIC using(
# MAGIC select * from tmp.tmp_retail_level_molecule
# MAGIC where data_flag = 2
# MAGIC ) as b
# MAGIC on a.quarter = b.quarter
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = 'OTHER_MOLECULE'
# MAGIC and a.data_flag = 1
# MAGIC when matched then
# MAGIC update set
# MAGIC a.sales_val = a.sales_val - b.sales_val,
# MAGIC a.sales_vol = a.sales_vol - b.sales_vol
# MAGIC
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.1 accumulate level molecule data and balance 'OTHER_BRAND'& 'OTHER_MOLECULE' data
# MAGIC -- 3.1.3 balance 'OTHER_BRAND' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC merge into tmp.tmp_retail_level_brand a
# MAGIC using(
# MAGIC select * from tmp.tmp_retail_level_molecule
# MAGIC where data_flag = 2
# MAGIC ) as b
# MAGIC on a.quarter = b.quarter
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = 'OTHER_MOLECULE'
# MAGIC and a.brand = 'OTHER_BRAND'
# MAGIC and a.data_flag = 1
# MAGIC when matched then
# MAGIC update set
# MAGIC a.sales_val = a.sales_val - b.sales_val,
# MAGIC a.sales_vol = a.sales_vol - b.sales_vol
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.2 accumulate level market data and balance 'OTHER_MOLECULE' data
# MAGIC -- 3.2.1 accumulate level market data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_aggregate_market as (
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_level_molecule
# MAGIC group by quarter,province_city ,ta ,market
# MAGIC ), tmp_append_market as (
# MAGIC select a.*
# MAGIC from tmp_aggregate_market a
# MAGIC where not exists (
# MAGIC select 1
# MAGIC from tmp.tmp_retail_level_market b
# MAGIC where a.quarter = b.quarter
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC )
# MAGIC )
# MAGIC -------append level market data accumulate from level molecule
# MAGIC insert into tmp.tmp_retail_level_market
# MAGIC select
# MAGIC quarter,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC 2
# MAGIC from tmp_append_market
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.2 accumulate level market data and balance 'OTHER_MOLECULE' data
# MAGIC -- 3.2.2 balance 'OTHER_MOLECULE' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC ---------not using in this case, beacuse level ta has no data------------------------
# MAGIC --merge into tmp.tmp_retail_level_market a
# MAGIC --using(
# MAGIC -- select * from tmp.tmp_retail_level_molecule
# MAGIC -- where data_flag = 2
# MAGIC --) as b
# MAGIC --on a.quarter = b.quarter
# MAGIC --and a.province_city = b.province_city
# MAGIC --and a.ta = b.ta
# MAGIC --and a.market = 'OTHER_MARKET'
# MAGIC --and a.data_flag = 1
# MAGIC --when matched then
# MAGIC -- update set
# MAGIC -- a.sales_val = a.sales_val - b.sales_val,
# MAGIC -- a.sales_vol = a.sales_vol - b.sales_vol
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-4: calculate brand ratio
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-4: calculate brand ratio
# MAGIC -- update total_val&total_vol and ratio_val&ratio_vol
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC merge into tmp.tmp_retail_level_brand a
# MAGIC using(
# MAGIC select * from tmp.tmp_retail_level_brand
# MAGIC where province_city = '全国'
# MAGIC ) as b
# MAGIC on a.quarter = b.quarter
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = b.molecule
# MAGIC and a.brand = b.brand
# MAGIC when matched then
# MAGIC update set
# MAGIC a.total_val = b.sales_val,
# MAGIC a.total_vol = b.sales_vol,
# MAGIC a.ratio_val = round(a.sales_val/b.sales_val,10),
# MAGIC a.ratio_vol = round(a.sales_vol/b.sales_vol,10)
# COMMAND ----------
############################################################END##############################################################

1200
Retail/03 split_pack_data.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,209 @@
# Databricks notebook source
############################################################START##############################################################
### STEP-1: insert splited pack data into tmp final table: tmp_retail_final_sales
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: insert splited pack data into tmp final table
# MAGIC -- insert into tmp_retail_final_sales
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_pack as (
# MAGIC select
# MAGIC ------------------------------------------------------
# MAGIC -- 有月度数据使用月度数据,无月度数据用季度数据去转
# MAGIC nvl(
# MAGIC a.month,
# MAGIC CONCAT(
# MAGIC SUBSTRING(a.quarter, 1, 4), -- 提取年份前4位
# MAGIC CASE
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '1' THEN '03' -- Q1 → 03月
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '2' THEN '06' -- Q2 → 06月
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '3' THEN '09' -- Q3 → 09月
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '4' THEN '12' -- Q4 → 12月
# MAGIC END
# MAGIC )
# MAGIC ) as YYYYMM,
# MAGIC ------------------------------------------------------
# MAGIC a.pack_code as iqvia_pack_code,
# MAGIC a.product_id as zk_product_id,
# MAGIC case when a.product_desc <> 'others' then a.product_desc else null end as prod_des_c,
# MAGIC case when a.product_desc <> 'others' then a.product_desc else concat('Others_', a.molecule_desc) end as PROD_MAPPING,
# MAGIC a.zk_regin as province_city,
# MAGIC a.level_market as market,
# MAGIC a.sales_value,
# MAGIC a.sales_unit,
# MAGIC ------------------------------------------------------
# MAGIC -- counting_unit取值逻辑
# MAGIC -- 不能直接取原始pack文件表中的值改为取pack_property表中counting_unit / unit的值
# MAGIC a.sales_unit * (b.counting_unit/ coalesce(b.unit,1)) as counting_unit,
# MAGIC ------------------------------------------------------
# MAGIC case when data_flag = 0 then 1 else 2 end as pack_flag,
# MAGIC case when brand_flag = 1 then 1 else 2 end as brand_flag
# MAGIC from tmp.tmp_retail_pack_rawdata a
# MAGIC left join dwd.dwd_gnd_ext_retail_pack_property b
# MAGIC on a.product_id = b.product_id
# MAGIC ), tmp_has_roc as (
# MAGIC select
# MAGIC product_id,
# MAGIC quarter,
# MAGIC ------------------------------------------------------
# MAGIC -- 有月度数据使用月度数据,无月度数据用季度数据去转
# MAGIC nvl(
# MAGIC month,
# MAGIC CONCAT(
# MAGIC SUBSTRING(quarter, 1, 4), -- 提取年份前4位
# MAGIC CASE
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '1' THEN '03' -- Q1 → 03月
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '2' THEN '06' -- Q2 → 06月
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '3' THEN '09' -- Q3 → 09月
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '4' THEN '12' -- Q4 → 12月
# MAGIC END
# MAGIC )
# MAGIC ) as month,
# MAGIC ------------------------------------------------------
# MAGIC pack_code
# MAGIC from tmp.tmp_retail_pack_rawdata
# MAGIC where zk_regin = 'ROC'
# MAGIC ), tmp_pack_this_year_with_roc as (
# MAGIC select
# MAGIC *
# MAGIC from tmp_pack a
# MAGIC where exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC ) and a.province_city <> '全国'
# MAGIC ), tmp_pack_next_year_with_roc as (
# MAGIC select
# MAGIC cast(YYYYMM + 100 as int) as YYYYMM,
# MAGIC iqvia_pack_code,
# MAGIC zk_product_id,
# MAGIC prod_des_c,
# MAGIC PROD_MAPPING,
# MAGIC province_city,
# MAGIC market,
# MAGIC sales_value as sales_value_ly,
# MAGIC sales_unit as sales_unit_ly,
# MAGIC counting_unit as counting_unit_ly,
# MAGIC pack_flag,
# MAGIC brand_flag
# MAGIC from tmp_pack a
# MAGIC where YYYYMM + 100 <= (select max(YYYYMM) from tmp_pack)
# MAGIC and exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC ) and a.province_city <> '全国'
# MAGIC
# MAGIC ), tmp_pack_this_year_without_roc as (
# MAGIC select
# MAGIC *
# MAGIC from tmp_pack a
# MAGIC where not exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC )
# MAGIC ), tmp_pack_next_year_without_roc as (
# MAGIC select
# MAGIC cast(YYYYMM + 100 as int) as YYYYMM,
# MAGIC iqvia_pack_code,
# MAGIC zk_product_id,
# MAGIC prod_des_c,
# MAGIC PROD_MAPPING,
# MAGIC province_city,
# MAGIC market,
# MAGIC sales_value as sales_value_ly,
# MAGIC sales_unit as sales_unit_ly,
# MAGIC counting_unit as counting_unit_ly,
# MAGIC pack_flag,
# MAGIC brand_flag
# MAGIC from tmp_pack a
# MAGIC where YYYYMM + 100 <= (select max(YYYYMM) from tmp_pack)
# MAGIC and not exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC )
# MAGIC ), tmp_final_sales as (
# MAGIC select
# MAGIC ifnull(a.yyyymm, b.yyyymm) as yyyymm,
# MAGIC ifnull(a.iqvia_pack_code, b.iqvia_pack_code) as iqvia_pack_code,
# MAGIC ifnull(a.zk_product_id, b.zk_product_id) as zk_product_id,
# MAGIC ifnull(a.prod_des_c, b.prod_des_c) as prod_des_c,
# MAGIC ifnull(a.PROD_MAPPING, b.PROD_MAPPING) as PROD_MAPPING,
# MAGIC ifnull(a.province_city, b.province_city) as province_city,
# MAGIC ifnull(a.market, b.market) as market,
# MAGIC ifnull(a.sales_value, 0) as sales_value,
# MAGIC ifnull(a.sales_unit, 0) as sales_unit,
# MAGIC ifnull(a.counting_unit, 0) as counting_unit,
# MAGIC ifnull(a.pack_flag, b.pack_flag) as pack_flag,
# MAGIC ifnull(a.brand_flag,b.brand_flag ) as brand_flag,
# MAGIC ifnull(b.sales_value_ly, 0) as sales_value_ly,
# MAGIC ifnull(b.sales_unit_ly, 0) as sales_unit_ly,
# MAGIC ifnull(b.counting_unit_ly, 0) as counting_unit_ly
# MAGIC from tmp_pack_this_year_with_roc a
# MAGIC full outer join tmp_pack_next_year_with_roc b
# MAGIC on a.YYYYMM = b.YYYYMM
# MAGIC and a.iqvia_pack_code = b.iqvia_pack_code
# MAGIC and a.zk_product_id = b.zk_product_id
# MAGIC and a.province_city = b.province_city
# MAGIC
# MAGIC union all
# MAGIC
# MAGIC select
# MAGIC ifnull(c.yyyymm, d.yyyymm) as yyyymm,
# MAGIC ifnull(c.iqvia_pack_code, d.iqvia_pack_code) as iqvia_pack_code,
# MAGIC ifnull(c.zk_product_id, d.zk_product_id) as zk_product_id,
# MAGIC ifnull(c.prod_des_c, d.prod_des_c) as prod_des_c,
# MAGIC ifnull(c.PROD_MAPPING, d.PROD_MAPPING) as PROD_MAPPING,
# MAGIC 'ROC' as province_city,
# MAGIC ifnull(c.market, d.market) as market,
# MAGIC ifnull(c.sales_value, 0) as sales_value,
# MAGIC ifnull(c.sales_unit, 0) as sales_unit,
# MAGIC ifnull(c.counting_unit, 0) as counting_unit,
# MAGIC --ifnull(c.pack_flag, d.pack_flag) as pack_flag,
# MAGIC 2 as pack_flag, -- 此类没有拆分比例且pack只有全国的数pack_flag固定为2
# MAGIC ifnull(c.brand_flag,d.brand_flag ) as brand_flag,
# MAGIC ifnull(d.sales_value_ly, 0) as sales_value_ly,
# MAGIC ifnull(d.sales_unit_ly, 0) as sales_unit_ly,
# MAGIC ifnull(d.counting_unit_ly, 0) as counting_unit_ly
# MAGIC from tmp_pack_this_year_without_roc c
# MAGIC full outer join tmp_pack_next_year_without_roc d
# MAGIC on c.YYYYMM = d.YYYYMM
# MAGIC and c.iqvia_pack_code = d.iqvia_pack_code
# MAGIC and c.zk_product_id = d.zk_product_id
# MAGIC and c.province_city = d.province_city
# MAGIC )
# MAGIC
# MAGIC insert overwrite table tmp.tmp_retail_final_sales
# MAGIC
# MAGIC select
# MAGIC yyyymm,
# MAGIC iqvia_pack_code,
# MAGIC zk_product_id,
# MAGIC prod_des_c,
# MAGIC PROD_MAPPING,
# MAGIC province_city,
# MAGIC market,
# MAGIC sales_value,
# MAGIC sales_value_ly,
# MAGIC sales_unit,
# MAGIC sales_unit_ly,
# MAGIC counting_unit,
# MAGIC counting_unit_ly,
# MAGIC pack_flag,
# MAGIC brand_flag
# MAGIC from tmp_final_sales
# MAGIC order by yyyymm
# COMMAND ----------
############################################################END################################################################

View File

@@ -0,0 +1,158 @@
# Databricks notebook source
# MAGIC %sql
# MAGIC -- CREATE or REPLACE TABLE tmp.tmp_retail_dtp_pack_rawdata (
# MAGIC -- -- product_id STRING,
# MAGIC -- iqvia_pack_code string,
# MAGIC -- -- region_type STRING,
# MAGIC -- year STRING,
# MAGIC -- time STRING,
# MAGIC -- -- higher_level_region STRING,
# MAGIC -- region STRING,
# MAGIC -- -- prescription_nature STRING,
# MAGIC -- -- medicine_attribute STRING,
# MAGIC -- -- dosage_form STRING,
# MAGIC -- -- object STRING,
# MAGIC -- -- zk_classify1 STRING,
# MAGIC -- -- zk_classify2 STRING,
# MAGIC -- -- zk_classify3 STRING,
# MAGIC -- target_points STRING,
# MAGIC -- -- common_name STRING,
# MAGIC -- -- brand_name STRING,
# MAGIC -- -- product_name STRING,
# MAGIC -- -- category_name STRING,
# MAGIC -- -- pack_des STRING,
# MAGIC -- counting_unit DECIMAL(20,8),
# MAGIC -- -- factory STRING,
# MAGIC -- -- corp_des STRING,
# MAGIC -- average_price DECIMAL(20,8),
# MAGIC -- sales_amount DECIMAL(20,8),
# MAGIC -- sales_volume DECIMAL(20,8),
# MAGIC -- counting_units_obversion DECIMAL(20,8),
# MAGIC -- pack_code STRING,
# MAGIC -- molecule_code STRING,
# MAGIC -- molecule_desc STRING,
# MAGIC -- product_code STRING,
# MAGIC -- product_desc STRING,
# MAGIC -- level_ta STRING,
# MAGIC -- level_market STRING,
# MAGIC -- level_molecule STRING,
# MAGIC -- level_brand STRING,
# MAGIC -- ratio_val DECIMAL(20,10),
# MAGIC -- ratio_vol DECIMAL(20,10),
# MAGIC -- data_flag INT,
# MAGIC -- brand_flag INT)
# MAGIC -- USING delta
# MAGIC -- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/TMP/tmp_retail_dtp_pack_rawdata'
# MAGIC -- ;
# COMMAND ----------
### constant
LEVEL_TA_ONC = 'ONC'
LEVEL_MARKET_EGFR_TKI = 'EGFR TKI'
# COMMAND ----------
############################################################START##############################################################
### STEP-2: load rawdata to tmp table
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-1: load rawdata to tmp table
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.1 load brand data to tmp_retail_dtp_brand_rawdata
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_dtp_brand_rawdata
# MAGIC
# MAGIC select
# MAGIC type,
# MAGIC 'ONC' as ta,
# MAGIC market,
# MAGIC zk_brand_category,
# MAGIC zk_common_name,
# MAGIC zk_manu_des,
# MAGIC rc_name_en,
# MAGIC province_city,
# MAGIC yyyymm,
# MAGIC ytd,
# MAGIC sales_value,
# MAGIC sales_volume,
# MAGIC price,
# MAGIC pdot_counting_unit
# MAGIC from dwd.dwd_gnd_ext_dtp_zk_brand
# MAGIC where ranked_by = 'value'
# MAGIC
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp_retail_dtp_pack_rawdata
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_dtp_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC -- dtp_raw_data.product_id,
# MAGIC dtp_pack_data.iqvia_pack_code,
# MAGIC -- dtp_raw_data.region_type,
# MAGIC dtp_raw_data.year,
# MAGIC dtp_raw_data.time,
# MAGIC -- dtp_raw_data.higher_level_region,
# MAGIC dtp_raw_data.region,
# MAGIC -- dtp_raw_data.prescription_nature,
# MAGIC -- dtp_raw_data.medicine_attribute,
# MAGIC -- dtp_raw_data.dosage_form,
# MAGIC -- dtp_raw_data.object,
# MAGIC -- dtp_raw_data.zk_classify1,
# MAGIC -- dtp_raw_data.zk_classify2,
# MAGIC -- dtp_raw_data.zk_classify3,
# MAGIC dtp_raw_data.target_points,
# MAGIC -- dtp_raw_data.common_name,
# MAGIC -- dtp_raw_data.brand_name,
# MAGIC -- dtp_raw_data.product_name,
# MAGIC -- dtp_raw_data.category_name,
# MAGIC -------------------------* hard_code *-------------------------
# MAGIC -- 针对pack_des这个字段pack原始表使用的是'*'但手工表pack_properity使用的是'x'
# MAGIC -- 而pack_des字段与关联出pack_code有关缺少此字段会导致关联失败所以要手动消除差异
# MAGIC -- replace(dtp_raw_data.pack_des,'*','x'),
# MAGIC -------------------------* hard_code *-------------------------
# MAGIC max(dtp_raw_data.counting_unit) counting_unit,
# MAGIC -- dtp_raw_data.factory,
# MAGIC -- dtp_raw_data.corp_des,
# MAGIC max(dtp_raw_data.average_price) average_price,
# MAGIC sum(dtp_raw_data.sales_amount) sales_amount,
# MAGIC sum(dtp_raw_data.sales_volume) sales_volume,
# MAGIC max(dtp_raw_data.counting_units_obversion) counting_units_obversion,
# MAGIC null as pack_code,
# MAGIC null as molecule_code,
# MAGIC null as molecule_desc,
# MAGIC null as product_code,
# MAGIC null as product_desc,
# MAGIC 'ONC' as level_ta,
# MAGIC null as level_market, --- 此pack原始表文件中存在多个市场后续通过关联更新
# MAGIC null as level_molecule,
# MAGIC null as level_brand,
# MAGIC null as ratio_val,
# MAGIC null as ratio_vol,
# MAGIC 0 as data_flag,
# MAGIC null as brand_flag
# MAGIC from dwd.dwd_gnd_ext_retail_dtp_datasource as dtp_raw_data
# MAGIC left join dwd.dwd_gnd_ext_dtp_pack_property as dtp_pack_data
# MAGIC on dtp_raw_data.product_id = dtp_pack_data.product_id
# MAGIC group by
# MAGIC dtp_pack_data.iqvia_pack_code,
# MAGIC dtp_raw_data.year,
# MAGIC dtp_raw_data.time,
# MAGIC dtp_raw_data.region,
# MAGIC dtp_raw_data.target_points
# COMMAND ----------
############################################################END##############################################################

View File

@@ -0,0 +1,421 @@
# Databricks notebook source
### constant
DATA_TYPE_MARKET = 'MARKET'
DATA_TYPE_MOLECULE = 'MOLECULE'
DATA_TYPE_BRAND = 'BRAND'
DATA_TYPE_MULTI_MARKET = 'MULTI_MARKET'
DATA_TYPE_MULTI_MOLECULE = 'MULTI_MOLECULE'
DATA_TYPE_MULTI_BRAND = 'MULTI_BRAND'
DATA_TYPE_DUPLICATE = 'DUPLICATE'
DATA_TYPE_HEDGE_TA = 'HEDGE_TA'
DATA_TYPE_HEDGE_MARKET = 'HEDGE_MARKET'
DATA_TYPE_HEDGE_MOLECULE = 'HEDGE_MOLECULE'
DATA_TYPE_HEDGE_BRAND = 'HEDGE_BRAND'
DATA_FLAG_RAW = 0
DATA_FLAG_SUB = 1
DATA_FLAG_ADD = 2
DATA_FLAG_RATIO = 3
DATA_FLAG_AVG = 4
DATA_FLAG_HEDGE = 5
DATA_OTHER_TA = 'OTHER_TA'
DATA_OTHER_MARKET = 'OTHER_MARKET'
DATA_OTHER_MOLECULE = 'OTHER_MOLECULE'
DATA_OTHER_BRAND = 'OTHER_BRAND'
DATA_AREA_TYPE_ROC = 'ROC'
DATA_LABEL_AREA_ALL = '全国'
# COMMAND ----------
############################################################START##############################################################
### STEP-1: load data to tmp table
### STEP-2: substract data from level market to brand
### STEP-3: accumulate data from level brand to market
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-1: load data to tmp table
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.1 load LEVEL_MARKET data to tmp.tmp_retail_dtp_level_market
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_dtp_level_market
# MAGIC
# MAGIC select
# MAGIC a.yyyymm,
# MAGIC a.province_city,
# MAGIC b.level_ta,
# MAGIC b.level_market,
# MAGIC a.sales_val,
# MAGIC a.sales_vol,
# MAGIC 0
# MAGIC from tmp.tmp_retail_dtp_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level = 'MARKET'
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.2 load LEVEL_MOLECULE data to tmp.tmp_retail_dtp_level_molecule
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_dtp_level_molecule
# MAGIC
# MAGIC select
# MAGIC a.yyyymm,
# MAGIC a.province_city,
# MAGIC b.level_ta,
# MAGIC b.level_market,
# MAGIC b.level_molecule,
# MAGIC a.sales_val,
# MAGIC a.sales_vol,
# MAGIC 0
# MAGIC from tmp.tmp_retail_dtp_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level = 'MOLECULE'
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load data to tmp table
# MAGIC -- 1.3 load LEVEL_BRAND data to tmp.tmp_retail_dtp_level_brand
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_dtp_level_brand
# MAGIC
# MAGIC select
# MAGIC a.yyyymm,
# MAGIC a.province_city,
# MAGIC b.level_ta,
# MAGIC b.level_market,
# MAGIC b.level_molecule,
# MAGIC b.level_brand,
# MAGIC a.sales_val,
# MAGIC a.sales_vol,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 0
# MAGIC from tmp.tmp_retail_dtp_brand_rawdata a
# MAGIC inner join dwd.dwd_gnd_retail_split_automatic b
# MAGIC on a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.brand_category = b.brand
# MAGIC and ifnull(a.common_name, '') = ifnull(b.common_name, '')
# MAGIC where b.level = 'BRAND'
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-2: substract data from level market to brand
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-2: substract data from level market to brand
# MAGIC -- 2.1 substract level market data and add 'OTHER_MOLECULE' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_aggregate_market as (
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_molecule
# MAGIC group by yyyymm, province_city , ta ,market
# MAGIC ),tmp_molecule as (
# MAGIC select
# MAGIC a.yyyymm,
# MAGIC a.province_city,
# MAGIC a.ta,
# MAGIC a.market,
# MAGIC a.sales_val - ifnull(b.sales_val, 0) as sales_val,
# MAGIC a.sales_vol - ifnull(b.sales_vol, 0) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_market a
# MAGIC left join tmp_aggregate_market b
# MAGIC on a.yyyymm = b.yyyymm
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC )
# MAGIC
# MAGIC -------append OTHER_MOLECULE to tmp.tmp_retail_dtp_level_molecule
# MAGIC insert into tmp.tmp_retail_dtp_level_molecule
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC 'OTHER_MOLECULE',
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC 1
# MAGIC from tmp_molecule
# MAGIC where sales_val > 0 or sales_vol > 0
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-2: substract data from level market to brand
# MAGIC -- 2.2 substract level molecule data and add 'OTHER_BRAND' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC with tmp_aggregate_molecule as (
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_brand
# MAGIC group by yyyymm,province_city ,ta ,market,molecule
# MAGIC ),tmp_brand as (
# MAGIC select
# MAGIC a.yyyymm,
# MAGIC a.province_city,
# MAGIC a.ta,
# MAGIC a.market,
# MAGIC a.molecule,
# MAGIC a.sales_val - ifnull(b.sales_val, 0) as sales_val,
# MAGIC a.sales_vol - ifnull(b.sales_vol, 0) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_molecule a
# MAGIC left join tmp_aggregate_molecule b
# MAGIC on a.yyyymm = b.yyyymm
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = b.molecule
# MAGIC )
# MAGIC
# MAGIC -------append OTHER_BRAND to tmp.tmp_retail_dtp_level_brand
# MAGIC insert into tmp.tmp_retail_dtp_level_brand
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC 'OTHER_BRAND',
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC null,
# MAGIC 1
# MAGIC from tmp_brand
# MAGIC where sales_val > 0 or sales_vol > 0
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-3: accumulate data from level brand to market
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.1 accumulate level molecule data and balance 'OTHER_BRAND' & 'OTHER_MOLECULE' data
# MAGIC -- 3.1.1 accumulate level molecule data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC with tmp_aggregate_molecule as (
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_brand
# MAGIC group by yyyymm,province_city ,ta ,market,molecule
# MAGIC ), tmp_append_molecule as (
# MAGIC select a.*
# MAGIC from tmp_aggregate_molecule a
# MAGIC where not exists (
# MAGIC select 1
# MAGIC from tmp.tmp_retail_dtp_level_molecule b
# MAGIC where a.yyyymm = b.yyyymm
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = b.molecule
# MAGIC )
# MAGIC )
# MAGIC -------append level molecule data accumulate from level brand
# MAGIC insert into tmp.tmp_retail_dtp_level_molecule
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC molecule,
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC 2
# MAGIC from tmp_append_molecule
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.1 accumulate level molecule data and balance 'OTHER_BRAND'& 'OTHER_MOLECULE' data
# MAGIC -- 3.1.2 balance 'OTHER_MOLECULE' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC merge into tmp.tmp_retail_dtp_level_molecule a
# MAGIC using(
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_molecule
# MAGIC where data_flag = 2
# MAGIC group by yyyymm,province_city,ta,market
# MAGIC ) as b
# MAGIC on a.yyyymm = b.yyyymm
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = 'OTHER_MOLECULE'
# MAGIC and a.data_flag = 1
# MAGIC when matched then
# MAGIC update set
# MAGIC a.sales_val = a.sales_val - b.sales_val,
# MAGIC a.sales_vol = a.sales_vol - b.sales_vol
# MAGIC
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.1 accumulate level molecule data and balance 'OTHER_BRAND'& 'OTHER_MOLECULE' data
# MAGIC -- 3.1.3 balance 'OTHER_BRAND' data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC merge into tmp.tmp_retail_dtp_level_brand a
# MAGIC using(
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_molecule
# MAGIC where data_flag = 2
# MAGIC group by yyyymm,province_city,ta,market
# MAGIC ) as b
# MAGIC on a.yyyymm = b.yyyymm
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = 'OTHER_MOLECULE'
# MAGIC and a.brand = 'OTHER_BRAND'
# MAGIC and a.data_flag = 1
# MAGIC when matched then
# MAGIC update set
# MAGIC a.sales_val = a.sales_val - b.sales_val,
# MAGIC a.sales_vol = a.sales_vol - b.sales_vol
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-3: accumulate data from level brand to market
# MAGIC -- 3.2 accumulate level market data and balance 'OTHER_MOLECULE' data
# MAGIC -- 3.2.1 accumulate level market data
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_aggregate_market as (
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sum(sales_val) as sales_val,
# MAGIC sum(sales_vol) as sales_vol
# MAGIC from tmp.tmp_retail_dtp_level_molecule
# MAGIC group by yyyymm,province_city ,ta ,market
# MAGIC ), tmp_append_market as (
# MAGIC select a.*
# MAGIC from tmp_aggregate_market a
# MAGIC where not exists (
# MAGIC select 1
# MAGIC from tmp.tmp_retail_dtp_level_market b
# MAGIC where a.yyyymm = b.yyyymm
# MAGIC and a.province_city = b.province_city
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC )
# MAGIC )
# MAGIC -------append level market data accumulate from level molecule
# MAGIC insert into tmp.tmp_retail_dtp_level_market
# MAGIC select
# MAGIC yyyymm,
# MAGIC province_city,
# MAGIC ta,
# MAGIC market,
# MAGIC sales_val,
# MAGIC sales_vol,
# MAGIC 2
# MAGIC from tmp_append_market
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-4: calculate brand ratio
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-4: calculate brand ratio
# MAGIC -- update total_val&total_vol and ratio_val&ratio_vol
# MAGIC -------------------------------------------------------------------------------------
# MAGIC merge into tmp.tmp_retail_dtp_level_brand a
# MAGIC using(
# MAGIC select * from tmp.tmp_retail_dtp_level_brand
# MAGIC where province_city = '全国'
# MAGIC ) as b
# MAGIC on a.yyyymm = b.yyyymm
# MAGIC and a.ta = b.ta
# MAGIC and a.market = b.market
# MAGIC and a.molecule = b.molecule
# MAGIC and a.brand = b.brand
# MAGIC when matched then
# MAGIC update set
# MAGIC a.total_val = b.sales_val,
# MAGIC a.total_vol = b.sales_vol,
# MAGIC a.ratio_val = round(a.sales_val/b.sales_val,10),
# MAGIC a.ratio_vol = round(a.sales_vol/b.sales_vol,10)
# COMMAND ----------
############################################################END################################################################

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,510 @@
# Databricks notebook source
# MAGIC %sql
# MAGIC -- CREATE or REPLACE TABLE tmp.tmp_retail_dtp_final_sales (
# MAGIC -- year STRING,
# MAGIC -- yq STRING,
# MAGIC -- yyyymm STRING,
# MAGIC -- iqvia_pack_code STRING,
# MAGIC -- geo_key STRING,
# MAGIC -- count_unit DOUBLE,
# MAGIC -- average_price DOUBLE,
# MAGIC -- sales_value DECIMAL(20,10),
# MAGIC -- sales_unit DECIMAL(20,10),
# MAGIC -- counting_units_obversion DECIMAL(20,10),
# MAGIC -- counting_unit DECIMAL(20,10),
# MAGIC -- sales_value_ly DECIMAL(20,10),
# MAGIC -- sales_unit_ly DECIMAL(20,10),
# MAGIC -- counting_unit_ly DECIMAL(20,10),
# MAGIC -- pack_flag INT,
# MAGIC -- brand_flag INT)
# MAGIC -- USING delta
# MAGIC -- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/TMP/tmp_retail_dtp_final_sales';
# COMMAND ----------
############################################################START##############################################################
### STEP-1: insert splited pack data into tmp final table: tmp_retail_final_dtp_sales
### STEP-2: calculate OTHERS data
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-1: insert splited pack data
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: insert splited pack data into tmp final table
# MAGIC -- insert into tmp_retail_dtp_final_sales (dws.dws_retail_dtp_sales)
# MAGIC -------------------------------------------------------------------------------------
# MAGIC with tmp_has_roc as (
# MAGIC select
# MAGIC pack_code,
# MAGIC time
# MAGIC from tmp.tmp_retail_dtp_pack_rawdata
# MAGIC where region = 'ROC'
# MAGIC ), temp_dtp_pack_property as (
# MAGIC select iqvia_pack_code,max(counting_unit) as counting_unit,max(unit) as unit
# MAGIC from dwd.dwd_gnd_ext_dtp_pack_property
# MAGIC group by iqvia_pack_code
# MAGIC ), tmp_pack as (
# MAGIC select
# MAGIC a.year,
# MAGIC a.time,
# MAGIC a.pack_code,
# MAGIC a.region,
# MAGIC a.counting_unit,
# MAGIC a.average_price,
# MAGIC a.sales_amount,
# MAGIC a.sales_volume,
# MAGIC a.counting_units_obversion,
# MAGIC ------------------------------------------------------
# MAGIC -- counting_unit取值逻辑
# MAGIC -- 不能直接取原始pack文件表中的值改为取pack_property表中counting_unit / unit的值
# MAGIC a.sales_volume * (b.counting_unit/ coalesce(b.unit,1)) as counting_unit_property,
# MAGIC ------------------------------------------------------
# MAGIC a.data_flag,
# MAGIC a.brand_flag,
# MAGIC -- a.prescription_nature,
# MAGIC -- a.medicine_attribute,
# MAGIC -- a.dosage_form,
# MAGIC -- a.object,
# MAGIC -- a.zk_classify1,
# MAGIC -- a.zk_classify2,
# MAGIC -- a.zk_classify3,
# MAGIC a.target_points
# MAGIC -- a.common_name,
# MAGIC -- a.brand_name,
# MAGIC -- a.product_name,
# MAGIC -- a.pack_des,
# MAGIC -- a.factory,
# MAGIC -- a.corp_des
# MAGIC from tmp.tmp_retail_dtp_pack_rawdata a
# MAGIC left join temp_dtp_pack_property b
# MAGIC ----------------------------------------------------
# MAGIC -- format iqvia_pack_code from dwd_gnd_ext_dtp_pack_property
# MAGIC -- on a.pack_code =
# MAGIC -- case when length(trim(b.iqvia_pack_code)) < 12 and trim(b.iqvia_pack_code) REGEXP '^[0-9]'
# MAGIC -- then right(concat('000000000000',trim(b.iqvia_pack_code)),12)
# MAGIC -- else trim(b.iqvia_pack_code)
# MAGIC -- end
# MAGIC -- ----------------------------------------------------
# MAGIC -- and nvl(a.prescription_nature,'') = nvl(b.prescription_nature,'')
# MAGIC -- and nvl(a.medicine_attribute,'') = nvl(b.medicine_attribute,'')
# MAGIC -- and nvl(a.dosage_form,'') = nvl(b.dosage_form,'')
# MAGIC -- and nvl(a.object,'') = nvl(b.object,'')
# MAGIC -- and nvl(a.zk_classify1,'') = nvl(b.zk_classify1,'')
# MAGIC -- and nvl(a.zk_classify2,'') = nvl(b.zk_classify2,'')
# MAGIC -- and nvl(a.zk_classify3,'') = nvl(b.zk_classify3,'')
# MAGIC -- and nvl(a.target_points,'') = nvl(b.target_points,'')
# MAGIC -- and nvl(a.common_name,'') = nvl(b.common_name,'')
# MAGIC -- and nvl(a.brand_name,'') = nvl(b.brand_name,'')
# MAGIC -- and nvl(a.product_name,'') = nvl(b.product_name,'')
# MAGIC -- and nvl(a.pack_des,'') = nvl(b.zk_pack_des,'')
# MAGIC -- and nvl(a.factory,'') = nvl(b.factory,'')
# MAGIC -- and nvl(a.corp_des,'') = nvl(b.zk_corp_des,'')
# MAGIC -- 20260226 不再使用上面的join条件,改为product_id关联
# MAGIC on a.iqvia_pack_code = b.iqvia_pack_code
# MAGIC ),tmp_pack_this_year_with_roc (
# MAGIC select
# MAGIC a.year,
# MAGIC concat(a.year, 'Q', CEIL(CAST(RIGHT(a.time,2) AS INT)/3)) as yq,
# MAGIC a.time as yyyymm,
# MAGIC a.pack_code as iqvia_pack_code,
# MAGIC a.region as geo_key,
# MAGIC a.counting_unit as count_unit,
# MAGIC a.average_price,
# MAGIC a.sales_amount as sales_value,
# MAGIC a.sales_volume as sales_unit,
# MAGIC a.counting_units_obversion,
# MAGIC a.counting_unit_property as counting_unit,
# MAGIC case when a.data_flag = 0 then 1 else 2 end as pack_flag,
# MAGIC case when a.brand_flag = 1 then 1 else 2 end as brand_flag,
# MAGIC -- a.prescription_nature,
# MAGIC -- a.medicine_attribute,
# MAGIC -- a.dosage_form,
# MAGIC -- a.object,
# MAGIC -- a.zk_classify1,
# MAGIC -- a.zk_classify2,
# MAGIC -- a.zk_classify3,
# MAGIC a.target_points
# MAGIC -- a.common_name,
# MAGIC -- a.brand_name,
# MAGIC -- a.product_name,
# MAGIC -- a.pack_des,
# MAGIC -- a.factory,
# MAGIC -- a.corp_des
# MAGIC from tmp_pack a
# MAGIC where exists (
# MAGIC select * from tmp_has_roc c
# MAGIC where c.pack_code = a.pack_code
# MAGIC and c.time = a.time
# MAGIC ) and a.region <> '全国'
# MAGIC ), tmp_pack_next_year_with_roc (
# MAGIC select
# MAGIC cast(a.year + 1 as int) as year,
# MAGIC concat(cast(a.year + 1 as int) , 'Q', CEIL(CAST(RIGHT(a.time,2) AS INT)/3)) as yq,
# MAGIC cast(a.time + 100 as int) as yyyymm,
# MAGIC a.pack_code as iqvia_pack_code,
# MAGIC a.region as geo_key,
# MAGIC a.counting_unit as count_unit,
# MAGIC a.average_price,
# MAGIC a.sales_amount as sales_value_ly,
# MAGIC a.sales_volume as sales_unit_ly,
# MAGIC a.counting_unit_property as counting_unit_ly,
# MAGIC a.counting_units_obversion,
# MAGIC case when a.data_flag = 0 then 1 else 2 end as pack_flag,
# MAGIC case when a.brand_flag = 1 then 1 else 2 end as brand_flag,
# MAGIC -- a.prescription_nature,
# MAGIC -- a.medicine_attribute,
# MAGIC -- a.dosage_form,
# MAGIC -- a.object,
# MAGIC -- a.zk_classify1,
# MAGIC -- a.zk_classify2,
# MAGIC -- a.zk_classify3,
# MAGIC a.target_points
# MAGIC -- a.common_name,
# MAGIC -- a.brand_name,
# MAGIC -- a.product_name,
# MAGIC -- a.pack_des,
# MAGIC -- a.factory,
# MAGIC -- a.corp_des
# MAGIC from tmp_pack a
# MAGIC where a.time + 100 <= (select max(time) from tmp_pack)
# MAGIC and exists (
# MAGIC select * from tmp_has_roc c
# MAGIC where c.pack_code = a.pack_code
# MAGIC and c.time = a.time
# MAGIC ) and a.region <> '全国'
# MAGIC ), tmp_pack_with_roc as (
# MAGIC select
# MAGIC ifnull(a.year, b.year) as year,
# MAGIC ifnull(a.yq, b.yq) as yq ,
# MAGIC ifnull(a.yyyymm, b.yyyymm) as yyyymm,
# MAGIC ifnull(a.iqvia_pack_code, b.iqvia_pack_code) as iqvia_pack_code,
# MAGIC ifnull(a.geo_key, b.geo_key) as geo_key,
# MAGIC ifnull(a.count_unit, b.count_unit) as count_unit,
# MAGIC ifnull(a.average_price, b.average_price) as average_price,
# MAGIC ifnull(a.sales_value, 0) as sales_value,
# MAGIC ifnull(a.sales_unit, 0) as sales_unit,
# MAGIC ifnull(a.counting_unit, 0) as counting_unit,
# MAGIC ifnull(a.counting_units_obversion, b.counting_units_obversion) as counting_units_obversion,
# MAGIC ifnull(a.pack_flag, b.pack_flag) as pack_flag,
# MAGIC ifnull(a.brand_flag, b.brand_flag) as brand_flag,
# MAGIC ifnull(b.sales_value_ly, 0) as sales_value_ly,
# MAGIC ifnull(b.sales_unit_ly, 0) as sales_unit_ly,
# MAGIC ifnull(b.counting_unit_ly, 0) as counting_unit_ly
# MAGIC from tmp_pack_this_year_with_roc a
# MAGIC full outer join tmp_pack_next_year_with_roc b
# MAGIC on a.yyyymm = b.yyyymm
# MAGIC and a.iqvia_pack_code = b.iqvia_pack_code
# MAGIC and a.geo_key = b.geo_key
# MAGIC -- and nvl(a.prescription_nature,'') = nvl(b.prescription_nature,'')
# MAGIC -- and nvl(a.medicine_attribute,'') = nvl(b.medicine_attribute,'')
# MAGIC -- and nvl(a.dosage_form,'') = nvl(b.dosage_form,'')
# MAGIC -- and nvl(a.object,'') = nvl(b.object,'')
# MAGIC -- and nvl(a.zk_classify1,'') = nvl(b.zk_classify1,'')
# MAGIC -- and nvl(a.zk_classify2,'') = nvl(b.zk_classify2,'')
# MAGIC -- and nvl(a.zk_classify3,'') = nvl(b.zk_classify3,'')
# MAGIC and nvl(a.target_points,'') = nvl(b.target_points,'')
# MAGIC -- and nvl(a.common_name,'') = nvl(b.common_name,'')
# MAGIC -- and nvl(a.brand_name,'') = nvl(b.brand_name,'')
# MAGIC -- and nvl(a.product_name,'') = nvl(b.product_name,'')
# MAGIC -- and nvl(a.pack_des,'') = nvl(b.pack_des,'')
# MAGIC -- and nvl(a.factory,'') = nvl(b.factory,'')
# MAGIC -- and nvl(a.corp_des,'') = nvl(b.corp_des,'')
# MAGIC ), tmp_pack_this_year_without_roc (
# MAGIC select
# MAGIC left(a.time, 4) year,
# MAGIC concat(a.year, 'Q', CEIL(CAST(RIGHT(a.time,2) AS INT)/3)) as yq,
# MAGIC a.time as yyyymm,
# MAGIC a.pack_code as iqvia_pack_code,
# MAGIC 'ROC' as geo_key,
# MAGIC a.counting_unit as count_unit,
# MAGIC a.average_price,
# MAGIC a.sales_amount as sales_value,
# MAGIC a.sales_volume as sales_unit,
# MAGIC a.counting_units_obversion,
# MAGIC a.counting_unit_property as counting_unit,
# MAGIC case when data_flag = 0 then 1 else 2 end as pack_flag,
# MAGIC case when brand_flag = 1 then 1 else 2 end as brand_flag,
# MAGIC -- a.prescription_nature,
# MAGIC -- a.medicine_attribute,
# MAGIC -- a.dosage_form,
# MAGIC -- a.object,
# MAGIC -- a.zk_classify1,
# MAGIC -- a.zk_classify2,
# MAGIC -- a.zk_classify3,
# MAGIC a.target_points
# MAGIC -- a.common_name,
# MAGIC -- a.brand_name,
# MAGIC -- a.product_name,
# MAGIC -- a.pack_des,
# MAGIC -- a.factory,
# MAGIC -- a.corp_des
# MAGIC from tmp_pack a
# MAGIC where not exists (
# MAGIC select * from tmp_has_roc c
# MAGIC where c.pack_code = a.pack_code
# MAGIC and c.time = a.time
# MAGIC )
# MAGIC ), tmp_pack_next_year_without_roc as (
# MAGIC select
# MAGIC cast(a.year + 1 as int) as year,
# MAGIC concat(cast(a.year + 1 as int) , 'Q', CEIL(CAST(RIGHT(a.time,2) AS INT)/3)) as yq,
# MAGIC cast(a.time + 100 as int) as yyyymm,
# MAGIC a.pack_code as iqvia_pack_code,
# MAGIC 'ROC' as geo_key,
# MAGIC a.counting_unit as count_unit,
# MAGIC a.average_price,
# MAGIC a.sales_amount as sales_value_ly,
# MAGIC a.sales_volume as sales_unit_ly,
# MAGIC a.counting_unit_property as counting_unit_ly,
# MAGIC a.counting_units_obversion,
# MAGIC case when data_flag = 0 then 1 else 2 end as pack_flag,
# MAGIC case when brand_flag = 1 then 1 else 2 end as brand_flag,
# MAGIC -- a.prescription_nature,
# MAGIC -- a.medicine_attribute,
# MAGIC -- a.dosage_form,
# MAGIC -- a.object,
# MAGIC -- a.zk_classify1,
# MAGIC -- a.zk_classify2,
# MAGIC -- a.zk_classify3,
# MAGIC a.target_points
# MAGIC -- a.common_name,
# MAGIC -- a.brand_name,
# MAGIC -- a.product_name,
# MAGIC -- a.pack_des,
# MAGIC -- a.factory,
# MAGIC -- a.corp_des
# MAGIC from tmp_pack a
# MAGIC where a.time + 100 <= (select max(time) from tmp_pack)
# MAGIC and not exists (
# MAGIC select * from tmp_has_roc c
# MAGIC where c.pack_code = a.pack_code
# MAGIC and c.time = a.time
# MAGIC )
# MAGIC ),tmp_pack_without_roc as (
# MAGIC select
# MAGIC ifnull(a.year, b.year) as year,
# MAGIC ifnull(a.yq, b.yq) as yq ,
# MAGIC ifnull(a.yyyymm, b.yyyymm) as yyyymm,
# MAGIC ifnull(a.iqvia_pack_code, b.iqvia_pack_code) as iqvia_pack_code,
# MAGIC ifnull(a.geo_key, b.geo_key) as geo_key,
# MAGIC ifnull(a.count_unit, b.count_unit) as count_unit,
# MAGIC ifnull(a.average_price, b.average_price) as average_price,
# MAGIC ifnull(a.sales_value, 0) as sales_value,
# MAGIC ifnull(a.sales_unit, 0) as sales_unit,
# MAGIC ifnull(a.counting_unit, 0) as counting_unit,
# MAGIC ifnull(a.counting_units_obversion, b.counting_units_obversion) as counting_units_obversion,
# MAGIC ifnull(a.pack_flag, b.pack_flag) as pack_flag,
# MAGIC ifnull(a.brand_flag, b.brand_flag) as brand_flag,
# MAGIC ifnull(b.sales_value_ly, 0) as sales_value_ly,
# MAGIC ifnull(b.sales_unit_ly, 0) as sales_unit_ly,
# MAGIC ifnull(b.counting_unit_ly, 0) as counting_unit_ly
# MAGIC from tmp_pack_this_year_without_roc a
# MAGIC full outer join tmp_pack_next_year_without_roc b
# MAGIC on a.yyyymm = b.yyyymm
# MAGIC and a.iqvia_pack_code = b.iqvia_pack_code
# MAGIC and a.geo_key = b.geo_key
# MAGIC -- and nvl(a.prescription_nature,'') = nvl(b.prescription_nature,'')
# MAGIC -- and nvl(a.medicine_attribute,'') = nvl(b.medicine_attribute,'')
# MAGIC -- and nvl(a.dosage_form,'') = nvl(b.dosage_form,'')
# MAGIC -- and nvl(a.object,'') = nvl(b.object,'')
# MAGIC -- and nvl(a.zk_classify1,'') = nvl(b.zk_classify1,'')
# MAGIC -- and nvl(a.zk_classify2,'') = nvl(b.zk_classify2,'')
# MAGIC -- and nvl(a.zk_classify3,'') = nvl(b.zk_classify3,'')
# MAGIC and nvl(a.target_points,'') = nvl(b.target_points,'')
# MAGIC -- and nvl(a.common_name,'') = nvl(b.common_name,'')
# MAGIC -- and nvl(a.brand_name,'') = nvl(b.brand_name,'')
# MAGIC -- and nvl(a.product_name,'') = nvl(b.product_name,'')
# MAGIC -- and nvl(a.pack_des,'') = nvl(b.pack_des,'')
# MAGIC -- and nvl(a.factory,'') = nvl(b.factory,'')
# MAGIC -- and nvl(a.corp_des,'') = nvl(b.corp_des,'')
# MAGIC ), tmp_final_sales as (
# MAGIC select * from tmp_pack_with_roc
# MAGIC union all
# MAGIC select * from tmp_pack_without_roc
# MAGIC )
# MAGIC
# MAGIC insert overwrite table tmp.tmp_retail_dtp_final_sales
# MAGIC
# MAGIC select
# MAGIC year,
# MAGIC yq,
# MAGIC yyyymm,
# MAGIC iqvia_pack_code,
# MAGIC geo_key,
# MAGIC count_unit,
# MAGIC average_price,
# MAGIC sales_value,
# MAGIC sales_unit,
# MAGIC counting_units_obversion,
# MAGIC counting_unit,
# MAGIC sales_value_ly,
# MAGIC sales_unit_ly,
# MAGIC counting_unit_ly,
# MAGIC pack_flag,
# MAGIC brand_flag
# MAGIC from tmp_final_sales
# MAGIC order by yyyymm
# MAGIC
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-2: calculate OTHERS data
# COMMAND ----------
# DBTITLE 1,不再计算
# %sql
# -------------------------------------------------------------------------------------
# -- STEP-2: calculate OTHERS data
# -- 2.1 calaulate DTP_AZ_OTHERS data
# -------------------------------------------------------------------------------------
# with tmp_az_total_now as (
# select
# cast(top_corp.sales_quarter as int ) yyyymm,
# top_corp.corp_name,
# top_corp.sales_amount *1000000.0 sales_amount,
# pack.CORP_COD
# from dwd.dwd_gnd_ext_retail_dtp_top_copd top_corp
# left join (
# select distinct ZK_Corp_C,CORP_COD
# from dwd.dwd_inc_gnd_retail_b2c_label_total
# ) pack on replace(top_corp.corp_name,'-','')=pack.ZK_Corp_C
# where pack.CORP_COD='A5Z'
# ), tmp_az_total_ly as (
# select
# cast(yyyymm + 100 as int) as yyyymm,
# corp_name,
# sales_amount as sales_amount_ly,
# CORP_COD
# from tmp_az_total_now
# ), tmp_az_total as (
# select
# a.*,
# ifnull(b.sales_amount_ly, 0) as sales_amount_ly
# from tmp_az_total_now a
# left join tmp_az_total_ly b
# on a.yyyymm = b.yyyymm
# and a.corp_name = b.corp_name
# and a.CORP_COD = b.CORP_COD
# ),tmp_az_pack_total as (
# select
# a.year,
# a.yq,
# a.yyyymm,
# sum(a.sales_value) as sales_value,
# sum(a.sales_unit) as sales_unit,
# sum(a.counting_unit) as counting_unit,
# sum(a.sales_value_ly) as sales_value_ly,
# sum(a.sales_unit_ly) as sales_unit_ly,
# sum(a.counting_unit_ly) as counting_unit_ly
# from tmp.tmp_retail_dtp_final_sales a
# where a.iqvia_pack_code in (
# select distinct iqvia_pack_code
# from tmp.tmp_zk_retail_dtp_market_corp
# where corp_cod = 'A5Z'
# )
# group by a.year,a.yq,a.yyyymm
# order by a.yyyymm
# )
# insert into table tmp.tmp_retail_dtp_final_sales
# select
# left(a.yyyymm, 4) as year,
# concat(left(a.yyyymm, 4), 'Q', CEIL(CAST(RIGHT(a.yyyymm,2) AS INT)/3)) as yq,
# a.yyyymm,
# 'DTP_AZ_OTHERS' as iqvia_pack_code,
# 'ROC' as geo_key,
# 0 as count_unit,
# 0 as average_price,
# a.sales_amount - nvl(b.sales_value, 0) as sales_value,
# 0 as sales_unit,
# 0 as counting_units_obversion,
# 0 as counting_unit,
# a.sales_amount_ly - nvl(b.sales_value_ly, 0) as sales_value_ly,
# 0 as sales_unit_ly,
# 0 as counting_unit_ly,
# 0 as pack_flag,
# 0 as brand_flag
# from tmp_az_total a
# left join tmp_az_pack_total b
# on a.yyyymm = b.yyyymm
# order by a.yyyymm
# COMMAND ----------
# DBTITLE 1,不再计算
# %sql
# -------------------------------------------------------------------------------------
# -- STEP-2: calculate OTHERS data
# -- 2.2 calaulate DTP_OTHERS data
# -------------------------------------------------------------------------------------
# with tmp_total_now as (
# SELECT
# dtp_name,
# sales_quarter as yyyymm,
# sales_amount * 1000000.0 as sales_amount
# FROM dwd.dwd_gnd_ext_retail_dtp_overall_market
# ), tmp_total_ly as (
# select
# dtp_name,
# cast(yyyymm + 100 as int) as yyyymm,
# sales_amount as sales_amount_ly
# from tmp_total_now
# ), tmp_total as (
# select
# a.*,
# ifnull(b.sales_amount_ly, 0) as sales_amount_ly
# from tmp_total_now a
# left join tmp_total_ly b
# on a.yyyymm = b.yyyymm
# ),tmp_pack_total as (
# select
# a.year,
# a.yq,
# a.yyyymm,
# sum(a.sales_value) as sales_value,
# sum(a.sales_unit) as sales_unit,
# sum(a.counting_unit) as counting_unit,
# sum(a.sales_value_ly) as sales_value_ly,
# sum(a.sales_unit_ly) as sales_unit_ly,
# sum(a.counting_unit_ly) as counting_unit_ly
# from tmp.tmp_retail_dtp_final_sales a
# group by a.year,a.yq,a.yyyymm
# order by a.yyyymm
# )
# insert into table tmp.tmp_retail_dtp_final_sales
# select
# left(a.yyyymm, 4) as year,
# concat(left(a.yyyymm, 4), 'Q', CEIL(CAST(RIGHT(a.yyyymm,2) AS INT)/3)) as yq,
# a.yyyymm,
# 'DTP_OTHERS' as iqvia_pack_code,
# 'ROC' as geo_key,
# 0 as count_unit,
# 0 as average_price,
# a.sales_amount - nvl(b.sales_value, 0) as sales_value,
# 0 as sales_unit,
# 0 as counting_units_obversion,
# 0 as counting_unit,
# a.sales_amount_ly - nvl(b.sales_value_ly, 0) as sales_value_ly,
# 0 as sales_unit_ly,
# 0 as counting_unit_ly,
# 0 as pack_flag,
# 0 as brand_flag
# from tmp_total a
# left join tmp_pack_total b
# on a.yyyymm = b.yyyymm
# order by a.yyyymm
# COMMAND ----------
############################################################END################################################################

View File

@@ -0,0 +1,177 @@
# Databricks notebook source
#当更新pack 或品牌 事实数据时需要运行此代码,否则无需运行。
# COMMAND ----------
# MAGIC %sql
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_oap' where file_name ='pack-CV-抗血栓2通用名-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_htn' where file_name ='pack-CV-高血压-化学药-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_atomizer' where file_name ='pack-雾化器-全国&县域数据.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_anti_asthma_copd' where file_name ='pack-RE-慢阻肺-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_zk_brand' where file_name ='Brand-品牌数据报表.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_statin_xzk' where file_name ='pack-CV-他汀类+血脂康-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_rd' where file_name ='pack-RD-肾科-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_aagsa_ppi_oral' where file_name ='pack-GI-慢性胃炎胃溃疡-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_niad' where file_name ='pack-DM-口服降糖化学药.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_metoprolol_tartrat' where file_name ='pack-CV-酒石酸美托洛尔.xlsx';
# MAGIC
# COMMAND ----------
# brand+ 省份数据自动接入
#获取配置表信息(表名、brand_flag
dfband = spark.sql("""
SELECT DISTINCT table_name tab ,file_name brand_flag FROM dwd.dwd_gnd_ext_retail_corresponding_relationship
where type_name ='BRAND'
""").collect()
def get_union_brand_data(df):
#数据为空
if df == None:
return None
#初始化结果集
union_query = None
for table in df:
# 选择当前表名
T = str(table.tab)
# 获取对应brand表维度对应得 market 名称
pack_flag = str(table.brand_flag)
sql = f"""
select
cast(left(quarter, 4)*100 + right(quarter,1)*3 as int ) AS YYYYMM
,cast(left(quarter, 4) as int ) AS year
,right(quarter, 2) AS quarter
,quarter AS yq
,type AS brand_cat_type
,case when ta = 'NIAD' then 'DM' else ta end AS TA
,market AS market
,zk_brand_category AS zk_brand_category
,zk_common_name AS zk_common_name
,zk_manu_des AS zk_manu_des
,rc_name_en AS rc_name_en
,province_city AS province_city
,ytd AS ytd
,cast(sales_value * 1000000 as decimal(30,10)) AS sales_val
,cast(sales_volume * 1000000 as decimal(30,10)) AS sales_vol
,cast(price as decimal(30,10)) as price
,cast(num_dist_rate as decimal(30,10)) as num_dist_rate
,cast(weig_dist_rate as decimal(30,10)) as weig_dist_rate
,cast(value_share as decimal(30,10)) as val_share
,cast(volume_share as decimal(30,10)) as vol_share
,replace(key_brand_ytd,'-','') as key_brand_ytd
,cast(replace(key_brand_rank_ytd,'-','0') as int) as key_brand_rank_ytd
,replace(top_brand_ytd,'-','') as top_brand_ytd
,cast(replace(top_brand_ms_ytd,'-','0') as decimal(30,10)) as top_brand_ms_ytd
,cast(replace(top_brand_inc_ms_ytd,'-','0') as decimal(30,10)) as top_brand_inc_ms_ytd
,cast(replace(top_brand_gr_ytd,'-','0') as decimal(30,10)) as top_brand_gr_ytd
,replace(key_brand_qtd,'-','') as key_brand_qtd
,cast(replace(key_brand_rank_qtd,'-','0') as int) as key_brand_rank_qtd
,replace(top_brand_qtd,'-','') as top_brand_qtd
,cast(replace(top_brand_ms_qtd,'-','0') as decimal(30,10)) as top_brand_ms_qtd
,cast(replace(top_brand_inc_ms_qtd,'-','0') as decimal(30,10)) as top_brand_inc_ms_qtd
,cast(replace(top_brand_gr_qtd,'-','0') as decimal(30,10)) as top_brand_gr_qtd
,ranked_by as ranked_by
,'{pack_flag}' as pack_flag
,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_insert_dt
,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_update_dt
from {T}
"""
# 读取数据
current_query = spark.sql(sql)
#union 数据
if union_query == None:
union_query = current_query
else:
union_query = union_query.union(current_query)
#返回数据集 / 写入表也行???
return union_query
brand_result = get_union_brand_data(dfband)
brand_result.write.mode("overwrite").saveAsTable("dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all")
# COMMAND ----------
# MAGIC %md
# MAGIC ###新逻辑
# MAGIC - 修改brand数据先拆分成月维度的数据
# COMMAND ----------
# MAGIC %sql
# MAGIC /*
# MAGIC 修改时间20250311
# MAGIC 修改人chenwu
# MAGIC 修改内容brand来数频率为 季度来数, 但是 pack 为 月度来数据,需要用季度的数据/3得到月度的
# MAGIC */
# MAGIC insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC with quarterly_table as (
# MAGIC select
# MAGIC *
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where market not in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液')
# MAGIC -- 范围内只能是 季度来数据的,如果有月度来数据的需要排除掉
# MAGIC )
# MAGIC
# MAGIC ,month_table as (--转化成月度数据
# MAGIC SELECT
# MAGIC SUBSTR(q.yq, 1, 4)*100 + -- 提取年份
# MAGIC LPAD(m.month_num, 2, '0') -- 补零月份
# MAGIC AS YYYYMM -- 月份首日
# MAGIC ,`year`
# MAGIC ,`quarter`
# MAGIC ,yq
# MAGIC ,brand_cat_type
# MAGIC ,TA
# MAGIC ,market
# MAGIC ,zk_brand_category
# MAGIC ,zk_common_name
# MAGIC ,zk_manu_des
# MAGIC ,rc_name_en
# MAGIC ,province_city
# MAGIC ,ytd
# MAGIC ,sales_val /3 --除3
# MAGIC ,sales_vol /3 --除3
# MAGIC ,price
# MAGIC ,num_dist_rate
# MAGIC ,weig_dist_rate
# MAGIC ,val_share
# MAGIC ,vol_share
# MAGIC ,key_brand_ytd
# MAGIC ,key_brand_rank_ytd
# MAGIC ,top_brand_ytd
# MAGIC ,top_brand_ms_ytd
# MAGIC ,top_brand_inc_ms_ytd
# MAGIC ,top_brand_gr_ytd
# MAGIC ,key_brand_qtd
# MAGIC ,key_brand_rank_qtd
# MAGIC ,top_brand_qtd
# MAGIC ,top_brand_ms_qtd
# MAGIC ,top_brand_inc_ms_qtd
# MAGIC ,top_brand_gr_qtd
# MAGIC ,ranked_by
# MAGIC ,pack_flag
# MAGIC ,etl_insert_dt
# MAGIC ,etl_update_dt
# MAGIC FROM
# MAGIC quarterly_table q
# MAGIC LATERAL VIEW EXPLODE( -- 为每季度生成三个月
# MAGIC CASE
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q1' THEN ARRAY(1, 2, 3)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q2' THEN ARRAY(4, 5, 6)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q3' THEN ARRAY(7, 8, 9)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q4' THEN ARRAY(10, 11, 12)
# MAGIC END
# MAGIC ) m AS month_num
# MAGIC )
# MAGIC
# MAGIC ,other_not_quarterly_table (
# MAGIC select
# MAGIC *
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where market in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液')
# MAGIC -- 范围内只能是 月度来数据的
# MAGIC )
# MAGIC
# MAGIC select * from month_table
# MAGIC union all
# MAGIC select * from other_not_quarterly_table

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,81 @@
-- Databricks notebook source
insert overwrite table dws.dws_tf_external_retail_dtp_special
with dim_geo as (
SELECT
AUDIT_COD
,REGION_TYPE
,PROVINCE_C
FROM DM.DM_TD_EXTERNAL_GEO
WHERE DATA_SOURCE = 'DTP(Quarterly)'
GROUP BY 1,2,3
)
,all_data (--DTP BRAND
select
market
,province_city
,yyyymm
,sum(sales_value) as sales_value
,sum(sales_volume) as sales_volume
,sum(pdot_counting_unit) as pdot_counting_unit
from dwd.dwd_gnd_ext_dtp_zk_brand --
where type = '品类'
and market = 'EGFR TKI'
and zk_brand_category ='EGFR TKI Market'
and ranked_by = 'value'
group by 1,2,3
)
,not_quanguo_data as (--
select
market
,yyyymm
,sum(sales_value) as sales_value
,sum(sales_volume) as sales_volume
,sum(pdot_counting_unit) as pdot_counting_unit
from all_data
where province_city <> '全国'
group by 1,2
)
,roc_data as (-- roc部分的值 = -
select
a.market
,'ROC' as province_city
,a.yyyymm
,a.sales_value-b.sales_value as sales_value
,a.sales_volume - b.sales_volume as sales_volume
,a.pdot_counting_unit - b.pdot_counting_unit as pdot_counting_unit
from all_data a
left join not_quanguo_data b on a.market = b.market and a.yyyymm = b.yyyymm
where a.province_city = '全国'
)
,all_data_with_roc as (--
select
market
,province_city
,yyyymm
,sales_value
,sales_volume
,pdot_counting_unit
from all_data where province_city <> '全国'
union all
select
market
,province_city
,yyyymm
,sales_value
,sales_volume
,pdot_counting_unit
from roc_data
)
select
a.market
,a.province_city
,concat(b.AUDIT_COD, 'DTP(Quarterly)', b.REGION_TYPE ) as audit_key --pbi dim_rc
,concat(b.AUDIT_COD, 'DTP(Quarterly)' ) audit_source --pbi dim_geo
,concat(left(a.yyyymm,4),'-',right(a.yyyymm,2),'-01') date_key --pbi dim_date
,a.yyyymm
,a.sales_value
,a.sales_volume
,a.pdot_counting_unit
from all_data_with_roc a
left join dim_geo b on a.province_city = b.PROVINCE_C

View File

@@ -0,0 +1,184 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TF_EXT_RETAIL_SALES (
-- YYYYMM STRING,
-- PACK_CODE STRING,
-- CORP_CODE STRING,
-- AUDIT_CODE STRING,
-- PLATFORM_TYPE STRING,
-- STORE_NAME STRING,
-- STORE_TYPE STRING,
-- REGION_TYPE STRING,
-- PACK_FLAG INT,
-- PROD_FLAG INT,
-- DTP_FLAG INT,
-- SALES_UNIT_CAL DECIMAL(38,10),
-- SALES_UNIT_CAL_LY DECIMAL(38,10),
-- SALES_VALUE_CAL DECIMAL(38,10),
-- SALES_VALUE_CAL_LY DECIMAL(38,10),
-- CONUTING_UNIT DECIMAL(38,10),
-- CONUTING_UNIT_LY DECIMAL(38,10),
-- DATA_SOURCE STRING,
-- INST_CODE STRING COMMENT '内部机构编码',
-- CMPS_FLAG STRING COMMENT '分子式标签',
-- DEPT_NAME STRING COMMENT '科室名称',
-- PRESCRIPTION DECIMAL(38,10) COMMENT '处方张数',
-- PRESCRIPTION_LY DECIMAL(38,10) COMMENT '去年同期处方张数',
-- NEW_CODE STRING COMMENT '主数据关联CODE',
-- AREA STRING COMMENT '城市',
-- H_LEVEL STRING COMMENT '医院类型',
-- REIMBURSE STRING COMMENT '报销情况',
-- REIMBURSE_TYPE STRING COMMENT '报销类型',
-- PRESCRIPTION_SOURCE STRING COMMENT '处方来源',
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_retail_sales';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_retail_sales';
-- COMMAND ----------
CREATE OR REPLACE TEMPORARY VIEW RETAIL_SALES
AS
SELECT
A.YYYYMM,
A.iqvia_pack_code AS PACK_CODE,
trim(A.AUDIT_COD) AS AUDIT_CODE,
'Retail(Quarterly)' DATA_SOURCE,
-------------------------------------
SUM(sales_unit) SALES_UNIT_CAL,
SUM(sales_unit_ly) as SALES_UNIT_CAL_LY,
SUM(sales_value) SALES_VALUE_CAL,
SUM(sales_value_LY) as SALES_VALUE_CAL_LY,
SUM(counting_unit) CONUTING_UNIT,
SUM(counting_unit_LY) as CONUTING_UNIT_LY,
null as prescription,
null as prescription_ly,
-------------------------------------
--Retail藏数逻辑标签------------------
A.PACK_FLAG,
A.brand_flag PROD_FLAG,
0 DTP_FLAG,
case
when
max(lower(C.CMPS_DES)) in ('atorvastatin', 'rosuvastatin', 'esomeprazole', 'omeprazole')
then
1
else 0
end as cmps_flag,
-------------------------------------
--EC数据标签--------------------------
'' PLATFORM_TYPE,
'' STORE_NAME,
'' STORE_TYPE,
-------------------------------------
--COUNTY数据标签----------------------
'' REGION_TYPE,
-------------------------------------
--AIA数据标签-------------------------
null inst_code, --使
-------------------------------------
--XIE HE 数据标签---------------------
'' as dept_name,
'' as new_code,
'' as area,
'' as h_level,
'' as reimburse,
'' as reimburse_type,
'' as prescription_source,
-------------------------------------
'' as CORP_CODE --可以移除,报告没有使用
-------------------------------------
FROM
dm.dm_zk_retail_sales A
LEFT JOIN (
select DISTINCT
iqvia_pack_code,
CMPS_DES
from
dm.dm_zk_retail_pack_property
) C
ON A.iqvia_pack_code = C.iqvia_pack_code
where
A.YYYYMM >= '202001'
GROUP BY
A.YYYYMM,
A.iqvia_pack_code,
A.AUDIT_COD,
A.PACK_FLAG,
A.brand_flag
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TF_EXT_RETAIL_SALES
(
YYYYMM,
PACK_CODE,
AUDIT_CODE,
DATA_SOURCE,
SALES_UNIT_CAL,
SALES_UNIT_CAL_LY,
SALES_VALUE_CAL,
SALES_VALUE_CAL_LY,
CONUTING_UNIT,
CONUTING_UNIT_LY,
PRESCRIPTION,
PRESCRIPTION_LY,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
INST_CODE,
DEPT_NAME,
NEW_CODE,
AREA,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE,
CORP_CODE,
ETL_INSERT_DT,
ETL_UPDATE_DT
)
SELECT
YYYYMM,
NVL (
NULLIF(PACK_CODE, ''),
CONCAT ('PACK_CODE_', DATA_SOURCE)
) AS PACK_CODE,
NVL (NULLIF(AUDIT_CODE, ''), 'ROC') AS AUDIT_CODE,
DATA_SOURCE,
SALES_UNIT_CAL,
SALES_UNIT_CAL_LY,
SALES_VALUE_CAL,
SALES_VALUE_CAL_LY,
CONUTING_UNIT,
CONUTING_UNIT_LY,
PRESCRIPTION,
PRESCRIPTION_LY,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
INST_CODE,
DEPT_NAME,
NEW_CODE,
AREA,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE,
CORP_CODE,
FROM_UTC_TIMESTAMP (CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP (CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
RETAIL_SALES
;

View File

@@ -0,0 +1,170 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TF_EXT_RETAIL_DTP_SALES (
-- YYYYMM STRING,
-- PACK_CODE STRING,
-- CORP_CODE STRING,
-- AUDIT_CODE STRING,
-- PLATFORM_TYPE STRING,
-- STORE_NAME STRING,
-- STORE_TYPE STRING,
-- REGION_TYPE STRING,
-- PACK_FLAG INT,
-- PROD_FLAG INT,
-- DTP_FLAG INT,
-- SALES_UNIT_CAL DECIMAL(38,10),
-- SALES_UNIT_CAL_LY DECIMAL(38,10),
-- SALES_VALUE_CAL DECIMAL(38,10),
-- SALES_VALUE_CAL_LY DECIMAL(38,10),
-- CONUTING_UNIT DECIMAL(38,10),
-- CONUTING_UNIT_LY DECIMAL(38,10),
-- DATA_SOURCE STRING,
-- INST_CODE STRING COMMENT '内部机构编码',
-- CMPS_FLAG STRING COMMENT '分子式标签',
-- DEPT_NAME STRING COMMENT '科室名称',
-- PRESCRIPTION DECIMAL(38,10) COMMENT '处方张数',
-- PRESCRIPTION_LY DECIMAL(38,10) COMMENT '去年同期处方张数',
-- NEW_CODE STRING COMMENT '主数据关联CODE',
-- AREA STRING COMMENT '城市',
-- H_LEVEL STRING COMMENT '医院类型',
-- REIMBURSE STRING COMMENT '报销情况',
-- REIMBURSE_TYPE STRING COMMENT '报销类型',
-- PRESCRIPTION_SOURCE STRING COMMENT '处方来源',
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_retail_dtp_sales';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_retail_dtp_sales';
-- COMMAND ----------
CREATE OR REPLACE TEMPORARY VIEW RETAIL_DTP_SALES
AS
SELECT
A.YYYYMM,
A.iqvia_pack_code AS PACK_CODE,
trim(A.AUDIT_COD) AS AUDIT_CODE,
'DTP(Quarterly)' DATA_SOURCE,
-------------------------------------
SUM(sales_unit) SALES_UNIT_CAL,
SUM(sales_unit_ly) as SALES_UNIT_CAL_LY,
SUM(sales_value) SALES_VALUE_CAL,
SUM(sales_value_LY) as SALES_VALUE_CAL_LY,
SUM(counting_unit) CONUTING_UNIT,
SUM(counting_unit_LY) as CONUTING_UNIT_LY,
null as prescription,
null as prescription_ly,
-------------------------------------
--Retail藏数逻辑标签------------------
1 PACK_FLAG,
brand_flag AS PROD_FLAG,
1 DTP_FLAG,
null as cmps_flag,
-------------------------------------
--EC数据标签--------------------------
'' PLATFORM_TYPE,
'' STORE_NAME,
'' STORE_TYPE,
-------------------------------------
--COUNTY数据标签----------------------
'' REGION_TYPE,
-------------------------------------
--AIA数据标签-------------------------
null inst_code, --使
-------------------------------------
--XIE HE 数据标签---------------------
'' as dept_name,
'' as new_code,
'' as area,
'' as h_level,
'' as reimburse,
'' as reimburse_type,
'' as prescription_source,
-------------------------------------
'' as CORP_CODE --可以移除,报告没有使用
-------------------------------------
FROM
dm.dm_zk_retail_dtp_sales A
where
A.YYYYMM >= '201901'
GROUP BY
A.YYYYMM,
A.iqvia_pack_code,
A.AUDIT_COD,
A.brand_flag,
A.pack_flag
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TF_EXT_RETAIL_DTP_SALES
(
YYYYMM,
PACK_CODE,
AUDIT_CODE,
DATA_SOURCE,
SALES_UNIT_CAL,
SALES_UNIT_CAL_LY,
SALES_VALUE_CAL,
SALES_VALUE_CAL_LY,
CONUTING_UNIT,
CONUTING_UNIT_LY,
PRESCRIPTION,
PRESCRIPTION_LY,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
INST_CODE,
DEPT_NAME,
NEW_CODE,
AREA,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE,
CORP_CODE,
ETL_INSERT_DT,
ETL_UPDATE_DT
)
SELECT
YYYYMM,
NVL (
NULLIF(PACK_CODE, ''),
CONCAT ('PACK_CODE_', DATA_SOURCE)
) AS PACK_CODE,
NVL (NULLIF(AUDIT_CODE, ''), 'ROC') AS AUDIT_CODE,
DATA_SOURCE,
SALES_UNIT_CAL,
SALES_UNIT_CAL_LY,
SALES_VALUE_CAL,
SALES_VALUE_CAL_LY,
CONUTING_UNIT,
CONUTING_UNIT_LY,
PRESCRIPTION,
PRESCRIPTION_LY,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
INST_CODE,
DEPT_NAME,
NEW_CODE,
AREA,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE,
CORP_CODE,
FROM_UTC_TIMESTAMP (CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP (CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
RETAIL_DTP_SALES
;

View File

@@ -0,0 +1,146 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_RETAIL_PACK_PROPERTY
-- (
-- MARKET_PACK_KEY STRING,
-- PACK_CODE STRING,
-- PACK_DESC STRING,
-- STGH_DESC STRING,
-- PACK_LCH STRING,
-- FAMILY_CODE STRING,
-- FAMILY_NAME STRING,
-- PROD_CODE STRING,
-- PROD_DESC STRING,
-- PROD_DESC_C STRING,
-- CMPS_CODE STRING,
-- CMPS_DESC STRING,
-- CMPS_DESC_C STRING,
-- ATC1_CODE STRING,
-- ATC2_CODE STRING,
-- ATC3_CODE STRING,
-- ATC4_CODE STRING,
-- APP1_CODE STRING,
-- APP2_CODE STRING,
-- APP3_CODE STRING,
-- BIO_DESC STRING,
-- GENE_ORIG_DESC STRING,
-- ETH_OTC_DESC STRING,
-- NRDL_DESC STRING,
-- NRDL_ENTRY_DATE STRING,
-- EDL_DESC STRING,
-- TCM_DESC STRING,
-- PAED_DESC STRING,
-- GQCE_DESC STRING,
-- VBP_DESC_V STRING,
-- VBP_DESC STRING,
-- MANU_CODE STRING,
-- MANU_DESC STRING,
-- MANU_DESC_C STRING,
-- MNFL_CODE STRING,
-- MNFL_DESC STRING,
-- CORP_CODE STRING,
-- CORP_DESC STRING,
-- CORP_DESC_C STRING,
-- BRANDTYPE STRING,
-- MARKET STRING,
-- KEY_COMPETITOR STRING,
-- IS_AZ STRING,
-- AZ_MAIN STRING,
-- AZ_RELATED STRING,
-- ATC1_DESC STRING,
-- ATC1_DESC_C STRING,
-- ATC2_DESC STRING,
-- ATC2_DESC_C STRING,
-- ATC3_DESC STRING,
-- ATC3_DESC_C STRING,
-- ATC4_DESC STRING,
-- ATC4_DESC_C STRING,
-- APP1_DESC STRING,
-- APP1_DESC_C STRING,
-- APP2_DESC STRING,
-- APP2_DESC_C STRING,
-- APP3_DESC STRING,
-- APP3_DESC_C STRING,
-- CLASS STRING,
-- MARKET_RATIO STRING,
-- COUNTINGUNIT STRING,
-- VBP_BRAND STRING,
-- REPLENISH_FALG STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_pack_property';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_pack_property';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_RETAIL_PACK_PROPERTY
SELECT
MARKET_PACK_KEY,
IQVIA_PACK_CODE AS PACK_CODE,
PACK_DES AS PACK_DESC,
STGH_DES AS STGH_DESC,
PACK_LCH,
FAMILY_CODE,
FAMILY_NAME,
IQVIA_PROD_CODE AS PROD_CODE,
PROD_DES AS PROD_DESC,
PROD_DES_C AS PROD_DESC_C,
CMPS_COD AS CMPS_CODE,
CMPS_DES AS CMPS_DESC,
CMPS_DES_C AS CMPS_DESC_C,
ATC1_COD AS ATC1_CODE,
ATC2_COD AS ATC2_CODE,
ATC3_COD AS ATC3_CODE,
ATC4_COD AS ATC4_CODE,
APP1_COD AS APP1_CODE,
APP2_COD AS APP2_CODE,
APP3_COD AS APP3_CODE,
BIO_DESC,
GENE_ORIG_DESC,
ETH_OTC_DESC,
NRDL_DESC,
NRDL_ENTRY_DATE,
EDL_DESC,
TCM_DESC,
PAED_DESC,
GQCE_DESC,
NULL AS VBP_DESC_V,
VBP_DESC,
MANU_COD AS MANU_CODE,
MANU_DES AS MANU_DESC,
MANU_DES_C AS MANU_DESC_C,
MNFL_COD AS MNFL_CODE,
MNFL_DES AS MNFL_DESC,
CORP_COD AS CORP_CODE,
CORP_DES AS CORP_DESC,
CORP_DES_C AS CORP_DESC_C,
BRANDTYPE,
MARKET,
KEY_COMPETITOR,
IS_AZ,
AZ_MAIN,
AZ_RELATED,
ATC1_DES AS ATC1_DESC,
ATC1_DES_C AS ATC1_DESC_C,
ATC2_DES AS ATC2_DESC,
ATC2_DES_C AS ATC2_DESC_C,
ATC3_DES AS ATC3_DESC,
ATC3_DES_C AS ATC3_DESC_C,
ATC4_DES AS ATC4_DESC,
ATC4_DES_C AS ATC4_DESC_C,
APP1_DES AS APP1_DESC,
APP1_DES_C AS APP1_DESC_C,
APP2_DES AS APP2_DESC,
APP2_DES_C AS APP2_DESC_C,
APP3_DES AS APP3_DESC,
APP3_DES_C AS APP3_DESC_C,
CLASS,
NULL AS MARKET_RATIO,
NULL AS COUNTINGUNIT,
NULL AS VBP_BRAND,
NULL AS REPLENISH_FALG,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_ZK_RETAIL_PACK_PROPERTY;

View File

@@ -0,0 +1,146 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_RETAIL_DTP_PACK_PROPERTY
-- (
-- MARKET_PACK_KEY STRING,
-- PACK_CODE STRING,
-- PACK_DESC STRING,
-- STGH_DESC STRING,
-- PACK_LCH STRING,
-- FAMILY_CODE STRING,
-- FAMILY_NAME STRING,
-- PROD_CODE STRING,
-- PROD_DESC STRING,
-- PROD_DESC_C STRING,
-- CMPS_CODE STRING,
-- CMPS_DESC STRING,
-- CMPS_DESC_C STRING,
-- ATC1_CODE STRING,
-- ATC2_CODE STRING,
-- ATC3_CODE STRING,
-- ATC4_CODE STRING,
-- APP1_CODE STRING,
-- APP2_CODE STRING,
-- APP3_CODE STRING,
-- BIO_DESC STRING,
-- GENE_ORIG_DESC STRING,
-- ETH_OTC_DESC STRING,
-- NRDL_DESC STRING,
-- NRDL_ENTRY_DATE STRING,
-- EDL_DESC STRING,
-- TCM_DESC STRING,
-- PAED_DESC STRING,
-- GQCE_DESC STRING,
-- VBP_DESC_V STRING,
-- VBP_DESC STRING,
-- MANU_CODE STRING,
-- MANU_DESC STRING,
-- MANU_DESC_C STRING,
-- MNFL_CODE STRING,
-- MNFL_DESC STRING,
-- CORP_CODE STRING,
-- CORP_DESC STRING,
-- CORP_DESC_C STRING,
-- BRANDTYPE STRING,
-- MARKET STRING,
-- KEY_COMPETITOR STRING,
-- IS_AZ STRING,
-- AZ_MAIN STRING,
-- AZ_RELATED STRING,
-- ATC1_DESC STRING,
-- ATC1_DESC_C STRING,
-- ATC2_DESC STRING,
-- ATC2_DESC_C STRING,
-- ATC3_DESC STRING,
-- ATC3_DESC_C STRING,
-- ATC4_DESC STRING,
-- ATC4_DESC_C STRING,
-- APP1_DESC STRING,
-- APP1_DESC_C STRING,
-- APP2_DESC STRING,
-- APP2_DESC_C STRING,
-- APP3_DESC STRING,
-- APP3_DESC_C STRING,
-- CLASS STRING,
-- MARKET_RATIO STRING,
-- COUNTINGUNIT STRING,
-- VBP_BRAND STRING,
-- REPLENISH_FALG STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_dtp_pack_property';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_dtp_pack_property';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_RETAIL_DTP_PACK_PROPERTY
SELECT
MARKET_PACK_KEY,
iqvia_pack_code AS PACK_CODE,
pack_des AS PACK_DESC,
stgh_des AS STGH_DESC,
pack_lch AS PACK_LCH,
Family_cod AS FAMILY_CODE,
Family_Name AS FAMILY_NAME,
PROD_COD AS PROD_CODE,
prod_des AS PROD_DESC,
prod_des_c AS PROD_DESC_C,
cmps_cod AS CMPS_CODE,
cmps_des AS CMPS_DESC,
cmps_des_c AS CMPS_DESC_C,
atc1_cod AS ATC1_CODE,
atc2_cod AS ATC2_CODE,
atc3_cod AS ATC3_CODE,
atc4_cod AS ATC4_CODE,
app1_cod AS APP1_CODE,
app2_cod AS APP2_CODE,
app3_cod AS APP3_CODE,
BIO_DESC,
gene_orig_desc AS GENE_ORIG_DESC,
eth_otc_desc AS ETH_OTC_DESC,
nrdl_desc AS NRDL_DESC,
NRDL_ENTRY_DATE,
edl_desc AS EDL_DESC,
TCM_DESC,
PAED_DESC,
GQCE_DESC,
NULL AS VBP_DESC_V,
VBP_DESC,
MANU_COD AS MANU_CODE,
MANU_DES AS MANU_DESC,
MANU_DES_C AS MANU_DESC_C,
MNFL_COD AS MNFL_CODE,
MNFL_DES AS MNFL_DESC,
CORP_COD AS CORP_CODE,
corp_des AS CORP_DESC,
CORP_DES_C AS CORP_DESC_C,
BrandType AS BRANDTYPE,
MARKET,
KEY_COMPETITOR,
IS_AZ,
AZ_MAIN,
AZ_Related AS AZ_RELATED,
atc1_des AS ATC1_DESC,
atc1_des_c AS ATC1_DESC_C,
atc2_des AS ATC2_DESC,
atc2_des_c AS ATC2_DESC_C,
atc3_des AS ATC3_DESC,
atc3_des_c AS ATC3_DESC_C,
atc4_des AS ATC4_DESC,
atc4_des_c AS ATC4_DESC_C,
app1_des AS APP1_DESC,
app1_des_c AS APP1_DESC_C,
app2_des AS APP2_DESC,
app2_des_c AS APP2_DESC_C,
app3_des AS APP3_DESC,
app3_des_c AS APP3_DESC_C,
Class AS CLASS,
NULL AS MARKET_RATIO,
NULL AS COUNTINGUNIT,
NULL AS VBP_BRAND,
NULL AS REPLENISH_FALG,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_ZK_RETAIL_DTP_PACK_PROPERTY;

View File

@@ -0,0 +1,248 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_DTP_PACK2MARKET (
-- MARKET STRING,
-- PACK_CODE STRING,
-- PACK_DESC STRING,
-- STGH_DESC STRING,
-- PACK_LCH STRING,
-- PROD_CODE STRING,
-- CMPS_CODE STRING,
-- CMPS_DESC STRING,
-- ATC1_CODE STRING,
-- ATC2_CODE STRING,
-- ATC3_CODE STRING,
-- ATC4_CODE STRING,
-- APP1_CODE STRING,
-- APP2_CODE STRING,
-- APP3_CODE STRING,
-- BIO_DESC STRING,
-- GENE_ORIG_DESC STRING,
-- ETH_OTC_DESC STRING,
-- NRDL_DESC STRING,
-- NRDL_ENTRY_DATE STRING,
-- EDL_DESC STRING,
-- TCM_DESC STRING,
-- PAED_DESC STRING,
-- GQCE_DESC STRING,
-- VBP_DESC STRING,
-- MANU_CODE STRING,
-- MANU_DESC STRING,
-- MNFL_CODE STRING,
-- MNFL_DESC STRING,
-- CORP_CODE STRING,
-- CORP_DESC STRING,
-- BRANDTYPE STRING,
-- BU STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- MARKET_RATIO STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_pack2market';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_pack2market';
-- COMMAND ----------
CREATE OR REPLACE TEMPORARY VIEW VIEW_EXT_DTP
AS
-- 包含规则
SELECT DISTINCT
T2.MARKET,
T1.PACK_CODE,
T1.PACK_DESC,
T1.STGH_DESC,
T1.PACK_LCH,
T1.PROD_CODE,
T1.CMPS_CODE,
T1.CMPS_DESC,
T1.ATC1_CODE,
T1.ATC2_CODE,
T1.ATC3_CODE,
T1.ATC4_CODE,
T1.APP1_CODE,
T1.APP2_CODE,
T1.APP3_CODE,
T1.BIO_DESC,
T1.GENE_ORIG_DESC AS GENE_ORIG,
T1.ETH_OTC_DESC AS ETH_OTC_D,
T1.NRDL_DESC,
T1.NRDL_ENTRY_DATE AS NRDL_ENTR,
T1.EDL_DESC,
T1.TCM_DESC,
T1.PAED_DESC,
T1.GQCE_DESC,
T1.VBP_DESC,
T1.MANU_CODE,
T1.MANU_DESC,
T1.MNFL_CODE,
T1.MNFL_DESC,
T1.CORP_CODE,
T1.CORP_DESC,
T1.BRANDTYPE,
T2.BU,
T2.STARTTIME,
T2.ENDTIME,
COALESCE(T2.EXTEND_MARKET_RATIO, '1') AS MARKET_RATIO
FROM DM.DM_TD_EXT_RETAIL_DTP_PACK_PROPERTY AS T1
LEFT JOIN (
SELECT
MARKET,
BU,
STARTTIME,
ENDTIME,
EXTEND_MARKET_RATIO,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
NFC1_CODE,
NFC2_CODE,
NFC3_CODE,
CORPORATION_CODE,
MANUFACTURER_CODE,
PRODUCT_CODE,
PACK_CODE,
STRENGTH,
MOLECULE_CODE
FROM DWD.dwd_gnd_ext_retail_dtp_tblmarket_bymonth
WHERE EXTEND_MARKET IS NULL AND (NOT_IN_FLAG IS NULL OR NOT_IN_FLAG = '1')
) AS T2
ON (T2.ATC1_CODE IS NULL OR T1.ATC1_CODE = T2.ATC1_CODE)
AND (T2.ATC2_CODE IS NULL OR T1.ATC2_CODE = T2.ATC2_CODE)
AND (T2.ATC3_CODE IS NULL OR T1.ATC3_CODE = T2.ATC3_CODE)
AND (T2.ATC4_CODE IS NULL OR T1.ATC4_CODE = T2.ATC4_CODE)
AND (T2.NFC1_CODE IS NULL OR T1.APP1_CODE = T2.NFC1_CODE)
AND (T2.NFC2_CODE IS NULL OR T1.APP2_CODE = T2.NFC2_CODE)
AND (T2.NFC3_CODE IS NULL OR T1.APP3_CODE = T2.NFC3_CODE)
AND (T2.CORPORATION_CODE IS NULL OR T1.CORP_CODE = T2.CORPORATION_CODE)
AND (T2.MANUFACTURER_CODE IS NULL OR T1.MANU_CODE = T2.MANUFACTURER_CODE)
AND (T2.PRODUCT_CODE IS NULL OR T1.PROD_CODE = T2.PRODUCT_CODE)
AND (T2.PACK_CODE IS NULL OR T1.PACK_CODE = T2.PACK_CODE)
AND (T2.STRENGTH IS NULL OR T1.STGH_DESC = T2.STRENGTH)
AND (T2.MOLECULE_CODE IS NULL OR T1.CMPS_CODE = T2.MOLECULE_CODE)
WHERE T2.MARKET IS NOT NULL
EXCEPT
-- 排除规则
SELECT DISTINCT
T2.MARKET,
T1.PACK_CODE,
T1.PACK_DESC,
T1.STGH_DESC,
T1.PACK_LCH,
T1.PROD_CODE,
T1.CMPS_CODE,
T1.CMPS_DESC,
T1.ATC1_CODE,
T1.ATC2_CODE,
T1.ATC3_CODE,
T1.ATC4_CODE,
T1.APP1_CODE,
T1.APP2_CODE,
T1.APP3_CODE,
T1.BIO_DESC,
T1.GENE_ORIG_DESC AS GENE_ORIG,
T1.ETH_OTC_DESC AS ETH_OTC_D,
T1.NRDL_DESC,
T1.NRDL_ENTRY_DATE AS NRDL_ENTR,
T1.EDL_DESC,
T1.TCM_DESC,
T1.PAED_DESC,
T1.GQCE_DESC,
T1.VBP_DESC,
T1.MANU_CODE,
T1.MANU_DESC,
T1.MNFL_CODE,
T1.MNFL_DESC,
T1.CORP_CODE,
T1.CORP_DESC,
T1.BRANDTYPE,
T2.BU,
T2.STARTTIME,
T2.ENDTIME,
COALESCE(T2.EXTEND_MARKET_RATIO, '1') AS MARKET_RATIO
FROM DM.DM_TD_EXT_RETAIL_DTP_PACK_PROPERTY AS T1
LEFT JOIN (
SELECT
MARKET,
BU,
STARTTIME,
ENDTIME,
EXTEND_MARKET_RATIO,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
NFC1_CODE,
NFC2_CODE,
NFC3_CODE,
CORPORATION_CODE,
MANUFACTURER_CODE,
PRODUCT_CODE,
PACK_CODE,
STRENGTH,
MOLECULE_CODE
FROM DWD.dwd_gnd_ext_retail_dtp_tblmarket_bymonth
WHERE EXTEND_MARKET IS NULL AND NOT_IN_FLAG = '0'
) AS T2
ON (T2.ATC1_CODE IS NULL OR T1.ATC1_CODE = T2.ATC1_CODE)
AND (T2.ATC2_CODE IS NULL OR T1.ATC2_CODE = T2.ATC2_CODE)
AND (T2.ATC3_CODE IS NULL OR T1.ATC3_CODE = T2.ATC3_CODE)
AND (T2.ATC4_CODE IS NULL OR T1.ATC4_CODE = T2.ATC4_CODE)
AND (T2.NFC1_CODE IS NULL OR T1.APP1_CODE = T2.NFC1_CODE)
AND (T2.NFC2_CODE IS NULL OR T1.APP2_CODE = T2.NFC2_CODE)
AND (T2.NFC3_CODE IS NULL OR T1.APP3_CODE = T2.NFC3_CODE)
AND (T2.CORPORATION_CODE IS NULL OR T1.CORP_CODE = T2.CORPORATION_CODE)
AND (T2.MANUFACTURER_CODE IS NULL OR T1.MANU_CODE = T2.MANUFACTURER_CODE)
AND (T2.PRODUCT_CODE IS NULL OR T1.PROD_CODE = T2.PRODUCT_CODE)
AND (T2.PACK_CODE IS NULL OR T1.PACK_CODE = T2.PACK_CODE)
AND (T2.STRENGTH IS NULL OR T1.STGH_DESC = T2.STRENGTH)
AND (T2.MOLECULE_CODE IS NULL OR T1.CMPS_CODE = T2.MOLECULE_CODE)
WHERE T2.MARKET IS NOT NULL;
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_DTP_PACK2MARKET
SELECT
MARKET,
PACK_CODE,
PACK_DESC,
STGH_DESC,
PACK_LCH,
PROD_CODE,
CMPS_CODE,
CMPS_DESC,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
APP1_CODE,
APP2_CODE,
APP3_CODE,
BIO_DESC,
GENE_ORIG,
ETH_OTC_D,
NRDL_DESC,
NRDL_ENTR,
EDL_DESC,
TCM_DESC,
PAED_DESC,
GQCE_DESC,
VBP_DESC,
MANU_CODE,
MANU_DESC,
MNFL_CODE,
MNFL_DESC,
CORP_CODE,
CORP_DESC,
BRANDTYPE,
BU,
STARTTIME,
ENDTIME,
MARKET_RATIO,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM VIEW_EXT_DTP

View File

@@ -0,0 +1,248 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_RETAIL_PACK2MARKET (
-- MARKET STRING,
-- PACK_CODE STRING,
-- PACK_DESC STRING,
-- STGH_DESC STRING,
-- PACK_LCH STRING,
-- PROD_CODE STRING,
-- CMPS_CODE STRING,
-- CMPS_DESC STRING,
-- ATC1_CODE STRING,
-- ATC2_CODE STRING,
-- ATC3_CODE STRING,
-- ATC4_CODE STRING,
-- APP1_CODE STRING,
-- APP2_CODE STRING,
-- APP3_CODE STRING,
-- BIO_DESC STRING,
-- GENE_ORIG_DESC STRING,
-- ETH_OTC_DESC STRING,
-- NRDL_DESC STRING,
-- NRDL_ENTRY_DATE STRING,
-- EDL_DESC STRING,
-- TCM_DESC STRING,
-- PAED_DESC STRING,
-- GQCE_DESC STRING,
-- VBP_DESC STRING,
-- MANU_CODE STRING,
-- MANU_DESC STRING,
-- MNFL_CODE STRING,
-- MNFL_DESC STRING,
-- CORP_CODE STRING,
-- CORP_DESC STRING,
-- BRANDTYPE STRING,
-- BU STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- MARKET_RATIO STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_pack2market';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_pack2market';
-- COMMAND ----------
CREATE OR REPLACE TEMPORARY VIEW VIEW_EXT_RETAIL
AS
-- 包含规则
SELECT DISTINCT
T2.MARKET,
T1.PACK_CODE,
T1.PACK_DESC,
T1.STGH_DESC,
T1.PACK_LCH,
T1.PROD_CODE,
T1.CMPS_CODE,
T1.CMPS_DESC,
T1.ATC1_CODE,
T1.ATC2_CODE,
T1.ATC3_CODE,
T1.ATC4_CODE,
T1.APP1_CODE,
T1.APP2_CODE,
T1.APP3_CODE,
T1.BIO_DESC,
T1.GENE_ORIG_DESC AS GENE_ORIG,
T1.ETH_OTC_DESC AS ETH_OTC_D,
T1.NRDL_DESC,
T1.NRDL_ENTRY_DATE AS NRDL_ENTR,
T1.EDL_DESC,
T1.TCM_DESC,
T1.PAED_DESC,
T1.GQCE_DESC,
T1.VBP_DESC,
T1.MANU_CODE,
T1.MANU_DESC,
T1.MNFL_CODE,
T1.MNFL_DESC,
T1.CORP_CODE,
T1.CORP_DESC,
T1.BRANDTYPE,
T2.BU,
T2.STARTTIME,
T2.ENDTIME,
COALESCE(T2.EXTEND_MARKET_RATIO, '1') AS MARKET_RATIO
FROM DM.DM_TD_EXT_RETAIL_PACK_PROPERTY AS T1
LEFT JOIN (
SELECT
MARKET,
BU,
STARTTIME,
ENDTIME,
EXTEND_MARKET_RATIO,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
NFC1_CODE,
NFC2_CODE,
NFC3_CODE,
CORPORATION_CODE,
MANUFACTURER_CODE,
PRODUCT_CODE,
PACK_CODE,
STRENGTH,
MOLECULE_CODE
FROM DWD.dwd_gnd_ext_retail_tblmarket_bymonth
WHERE EXTEND_MARKET IS NULL AND (NOT_IN_FLAG IS NULL OR NOT_IN_FLAG = '1')
) AS T2
ON (T2.ATC1_CODE IS NULL OR T1.ATC1_CODE = T2.ATC1_CODE)
AND (T2.ATC2_CODE IS NULL OR T1.ATC2_CODE = T2.ATC2_CODE)
AND (T2.ATC3_CODE IS NULL OR T1.ATC3_CODE = T2.ATC3_CODE)
AND (T2.ATC4_CODE IS NULL OR T1.ATC4_CODE = T2.ATC4_CODE)
AND (T2.NFC1_CODE IS NULL OR T1.APP1_CODE = T2.NFC1_CODE)
AND (T2.NFC2_CODE IS NULL OR T1.APP2_CODE = T2.NFC2_CODE)
AND (T2.NFC3_CODE IS NULL OR T1.APP3_CODE = T2.NFC3_CODE)
AND (T2.CORPORATION_CODE IS NULL OR T1.CORP_CODE = T2.CORPORATION_CODE)
AND (T2.MANUFACTURER_CODE IS NULL OR T1.MANU_CODE = T2.MANUFACTURER_CODE)
AND (T2.PRODUCT_CODE IS NULL OR T1.PROD_CODE = T2.PRODUCT_CODE)
AND (T2.PACK_CODE IS NULL OR T1.PACK_CODE = T2.PACK_CODE)
AND (T2.STRENGTH IS NULL OR T1.STGH_DESC = T2.STRENGTH)
AND (T2.MOLECULE_CODE IS NULL OR T1.CMPS_CODE = T2.MOLECULE_CODE)
WHERE T2.MARKET IS NOT NULL
EXCEPT
-- 排除规则
SELECT DISTINCT
T2.MARKET,
T1.PACK_CODE,
T1.PACK_DESC,
T1.STGH_DESC,
T1.PACK_LCH,
T1.PROD_CODE,
T1.CMPS_CODE,
T1.CMPS_DESC,
T1.ATC1_CODE,
T1.ATC2_CODE,
T1.ATC3_CODE,
T1.ATC4_CODE,
T1.APP1_CODE,
T1.APP2_CODE,
T1.APP3_CODE,
T1.BIO_DESC,
T1.GENE_ORIG_DESC AS GENE_ORIG,
T1.ETH_OTC_DESC AS ETH_OTC_D,
T1.NRDL_DESC,
T1.NRDL_ENTRY_DATE AS NRDL_ENTR,
T1.EDL_DESC,
T1.TCM_DESC,
T1.PAED_DESC,
T1.GQCE_DESC,
T1.VBP_DESC,
T1.MANU_CODE,
T1.MANU_DESC,
T1.MNFL_CODE,
T1.MNFL_DESC,
T1.CORP_CODE,
T1.CORP_DESC,
T1.BRANDTYPE,
T2.BU,
T2.STARTTIME,
T2.ENDTIME,
COALESCE(T2.EXTEND_MARKET_RATIO, '1') AS MARKET_RATIO
FROM DM.DM_TD_EXT_RETAIL_PACK_PROPERTY AS T1
LEFT JOIN (
SELECT
MARKET,
BU,
STARTTIME,
ENDTIME,
EXTEND_MARKET_RATIO,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
NFC1_CODE,
NFC2_CODE,
NFC3_CODE,
CORPORATION_CODE,
MANUFACTURER_CODE,
PRODUCT_CODE,
PACK_CODE,
STRENGTH,
MOLECULE_CODE
FROM DWD.dwd_gnd_ext_retail_tblmarket_bymonth
WHERE EXTEND_MARKET IS NULL AND NOT_IN_FLAG = '0'
) AS T2
ON (T2.ATC1_CODE IS NULL OR T1.ATC1_CODE = T2.ATC1_CODE)
AND (T2.ATC2_CODE IS NULL OR T1.ATC2_CODE = T2.ATC2_CODE)
AND (T2.ATC3_CODE IS NULL OR T1.ATC3_CODE = T2.ATC3_CODE)
AND (T2.ATC4_CODE IS NULL OR T1.ATC4_CODE = T2.ATC4_CODE)
AND (T2.NFC1_CODE IS NULL OR T1.APP1_CODE = T2.NFC1_CODE)
AND (T2.NFC2_CODE IS NULL OR T1.APP2_CODE = T2.NFC2_CODE)
AND (T2.NFC3_CODE IS NULL OR T1.APP3_CODE = T2.NFC3_CODE)
AND (T2.CORPORATION_CODE IS NULL OR T1.CORP_CODE = T2.CORPORATION_CODE)
AND (T2.MANUFACTURER_CODE IS NULL OR T1.MANU_CODE = T2.MANUFACTURER_CODE)
AND (T2.PRODUCT_CODE IS NULL OR T1.PROD_CODE = T2.PRODUCT_CODE)
AND (T2.PACK_CODE IS NULL OR T1.PACK_CODE = T2.PACK_CODE)
AND (T2.STRENGTH IS NULL OR T1.STGH_DESC = T2.STRENGTH)
AND (T2.MOLECULE_CODE IS NULL OR T1.CMPS_CODE = T2.MOLECULE_CODE)
WHERE T2.MARKET IS NOT NULL;
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_RETAIL_PACK2MARKET
SELECT
MARKET,
PACK_CODE,
PACK_DESC,
STGH_DESC,
PACK_LCH,
PROD_CODE,
CMPS_CODE,
CMPS_DESC,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
APP1_CODE,
APP2_CODE,
APP3_CODE,
BIO_DESC,
GENE_ORIG,
ETH_OTC_D,
NRDL_DESC,
NRDL_ENTR,
EDL_DESC,
TCM_DESC,
PAED_DESC,
GQCE_DESC,
VBP_DESC,
MANU_CODE,
MANU_DESC,
MNFL_CODE,
MNFL_DESC,
CORP_CODE,
CORP_DESC,
BRANDTYPE,
BU,
STARTTIME,
ENDTIME,
MARKET_RATIO,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM VIEW_EXT_RETAIL

View File

@@ -0,0 +1,30 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_DTP_MARKET_RATIO (
-- MARKET STRING,
-- PACK_CODE STRING,
-- CMPS_CODE STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- MARKET_RATIO STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_market_ratio';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_market_ratio';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_DTP_MARKET_RATIO
SELECT
DISTINCT
MARKET,
PACK_CODE,
CMPS_CODE,
STARTTIME,
ENDTIME,
MARKET_RATIO,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_DTP_PACK2MARKET
WHERE UPPER(MARKET) NOT LIKE '%ALL%'

View File

@@ -0,0 +1,30 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_RETAIL_MARKET_RATIO (
-- MARKET STRING,
-- PACK_CODE STRING,
-- CMPS_CODE STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- MARKET_RATIO STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_market_ratio';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_market_ratio';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_RETAIL_MARKET_RATIO
SELECT
DISTINCT
MARKET,
PACK_CODE,
CMPS_CODE,
STARTTIME,
ENDTIME,
MARKET_RATIO,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_RETAIL_PACK2MARKET
WHERE UPPER(MARKET) NOT LIKE '%ALL%'

View File

@@ -0,0 +1,160 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_DTP_MARKET_BRAND_RATIO (
-- MARKET STRING,
-- PACK_CODE STRING,
-- VALUE_BRAND_RATIO FLOAT,
-- VALUE_BRAND_RATIO_START STRING,
-- VALUE_BRAND_RATIO_END STRING,
-- UNIT_BRAND_RATIO FLOAT,
-- UNIT_BRAND_START STRING,
-- UNIT_BRAND_END STRING,
-- COUNTINGUNIT_BRAND_RATIO FLOAT,
-- COUNTINGUNIT_BRAND_START STRING,
-- COUNTINGUNIT_BRAND_END STRING,
-- PDOT_BRAND_RATIO FLOAT,
-- PDOT_BRAND_START STRING,
-- PDOT_BRAND_END STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_market_brand_ratio';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_market_brand_ratio';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_DTP_MARKET_BRAND_RATIO
SELECT
DISTINCT
T1.MARKET,
T1.PACK_CODE,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.RATIO
ELSE NVL(T5.RATIO, 1) END AS VALUE_BRAND_RATIO,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.STARTTIME
ELSE NVL(T5.STARTTIME, 200001) END AS VALUE_BRAND_RATIO_START,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.ENDTIME
ELSE NVL(T5.ENDTIME, 299912) END AS VALUE_BRAND_RATIO_END,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.RATIO
ELSE NVL(T6.RATIO, 1) END AS UNIT_BRAND_RATIO,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.STARTTIME
ELSE NVL(T6.STARTTIME, 200001) END AS UNIT_BRAND_START,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.ENDTIME
ELSE NVL(T6.ENDTIME, 299912) END AS UNIT_BRAND_END,
1 AS COUNTINGUNIT_BRAND_RATIO,
200001 AS COUNTINGUNIT_BRAND_START,
299912 AS COUNTINGUNIT_BRAND_END,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.RATIO
ELSE NVL(T7.RATIO, 1) END AS PDOT_BRAND_RATIO,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.STARTTIME
ELSE NVL(T7.STARTTIME, 200001) END AS PDOT_BRAND_START,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.ENDTIME
ELSE NVL(T7.ENDTIME, 299912) END AS PDOT_BRAND_END,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_DTP_MARKET_RATIO T1
LEFT JOIN ( --VALUE--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'VALUE' AND UPPER(LEVEL) = 'PACK'
) T2 ON T1.MARKET = T2.MARKET AND T1.PACK_CODE = T2.PACK_COD
LEFT JOIN ( --UNIT--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'UNIT' AND UPPER(LEVEL) = 'PACK'
) T3 ON T1.MARKET = T3.MARKET AND T1.PACK_CODE = T3.PACK_COD
LEFT JOIN ( --COUNTINGUNIT--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'COUNTINGUNIT' AND UPPER(LEVEL) = 'PACK'
) T4 ON T1.MARKET = T4.MARKET AND T1.PACK_CODE = T4.PACK_COD
LEFT JOIN ( --VALUE--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'VALUE' AND UPPER(LEVEL) = 'MOLECULE'
) T5 ON T1.MARKET = T5.MARKET AND T1.CMPS_CODE = T5.CMPS_COD
LEFT JOIN ( --UNIT--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'UNIT' AND UPPER(LEVEL) = 'MOLECULE'
) T6 ON T1.MARKET = T6.MARKET AND T1.CMPS_CODE = T6.CMPS_COD
LEFT JOIN ( --COUNTINGUNIT--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'COUNTINGUNIT' AND UPPER(LEVEL) = 'MOLECULE'
) T7 ON T1.MARKET = T7.MARKET
AND T1.CMPS_CODE = T7.CMPS_COD

View File

@@ -0,0 +1,163 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_RETAIL_MARKET_BRAND_RATIO (
-- MARKET STRING,
-- PACK_CODE STRING,
-- VALUE_BRAND_RATIO FLOAT,
-- VALUE_BRAND_RATIO_START STRING,
-- VALUE_BRAND_RATIO_END STRING,
-- UNIT_BRAND_RATIO FLOAT,
-- UNIT_BRAND_START STRING,
-- UNIT_BRAND_END STRING,
-- COUNTINGUNIT_BRAND_RATIO FLOAT,
-- COUNTINGUNIT_BRAND_START STRING,
-- COUNTINGUNIT_BRAND_END STRING,
-- PDOT_BRAND_RATIO FLOAT,
-- PDOT_BRAND_START STRING,
-- PDOT_BRAND_END STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_market_brand_ratio';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_market_brand_ratio';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_RETAIL_MARKET_BRAND_RATIO
SELECT
DISTINCT
T1.MARKET,
T1.PACK_CODE,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.RATIO
ELSE NVL(T5.RATIO, 1) END AS VALUE_BRAND_RATIO,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.STARTTIME
ELSE NVL(T5.STARTTIME, 200001) END AS VALUE_BRAND_RATIO_START,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.ENDTIME
ELSE NVL(T5.ENDTIME, 299912) END AS VALUE_BRAND_RATIO_END,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.RATIO
ELSE NVL(T6.RATIO, 1) END AS UNIT_BRAND_RATIO,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.STARTTIME
ELSE NVL(T6.STARTTIME, 200001) END AS UNIT_BRAND_START,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.ENDTIME
ELSE NVL(T6.ENDTIME, 299912) END AS UNIT_BRAND_END,
1 AS COUNTINGUNIT_BRAND_RATIO,
200001 AS COUNTINGUNIT_BRAND_START,
299912 AS COUNTINGUNIT_BRAND_END,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.RATIO
ELSE NVL(T7.RATIO, 1) END AS PDOT_BRAND_RATIO,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.STARTTIME
ELSE NVL(T7.STARTTIME, 200001) END AS PDOT_BRAND_START,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.ENDTIME
ELSE NVL(T7.ENDTIME, 299912) END AS PDOT_BRAND_END,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_RETAIL_MARKET_RATIO T1
LEFT JOIN ( --VALUE--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'VALUE' AND UPPER(LEVEL) = 'PACK'
) T2 ON T1.MARKET = T2.MARKET AND T1.PACK_CODE = T2.PACK_COD
LEFT JOIN ( --UNIT--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'UNIT' AND UPPER(LEVEL) = 'PACK'
) T3 ON T1.MARKET = T3.MARKET AND T1.PACK_CODE = T3.PACK_COD
LEFT JOIN ( --COUNTINGUNIT--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'COUNTINGUNIT' AND UPPER(LEVEL) = 'PACK'
) T4 ON T1.MARKET = T4.MARKET AND T1.PACK_CODE = T4.PACK_COD
LEFT JOIN ( --VALUE--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'VALUE' AND UPPER(LEVEL) = 'MOLECULE'
) T5 ON T1.MARKET = T5.MARKET AND T1.CMPS_CODE = T5.CMPS_COD
LEFT JOIN ( --UNIT--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'UNIT' AND UPPER(LEVEL) = 'MOLECULE'
) T6 ON T1.MARKET = T6.MARKET AND T1.CMPS_CODE = T6.CMPS_COD
LEFT JOIN ( --COUNTINGUNIT--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'COUNTINGUNIT' AND UPPER(LEVEL) = 'MOLECULE'
) T7 ON T1.MARKET = T7.MARKET
AND T1.CMPS_CODE = T7.CMPS_COD
-- COMMAND ----------

View File

@@ -0,0 +1,88 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_DTP_MARKET_PACK_MAPPING
-- (
-- PACK_CODE STRING,
-- MARKET STRING,
-- CLASS STRING,
-- KEY_COMPETITOR STRING,
-- MARKET_RATIO STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- VALUE_BRAND_RATIO STRING,
-- VALUE_BRAND_RATIO_START STRING,
-- VALUE_BRAND_RATIO_END STRING,
-- UNIT_BRAND_RATIO STRING,
-- UNIT_BRAND_START STRING,
-- UNIT_BRAND_END STRING,
-- COUNTINGUNIT_BRAND_RATIO STRING,
-- COUNTINGUNIT_BRAND_START STRING,
-- COUNTINGUNIT_BRAND_END STRING,
-- PDOT_BRAND_RATIO STRING,
-- PDOT_BRAND_START STRING,
-- PDOT_BRAND_END STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_market_pack_mapping';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_dtp_market_pack_mapping';
-- COMMAND ----------
CREATE OR REPLACE TEMPORARY VIEW DTP_MARKET_PACK_MAPPING
AS
SELECT DISTINCT
T1.PACK_CODE,
T1.MARKET,
T1.CLASS,
T1.KEY_COMPETITOR,
T2.MARKET_RATIO,
T2.STARTTIME,
T2.ENDTIME,
T3.VALUE_BRAND_RATIO,
T3.VALUE_BRAND_RATIO_START,
T3.VALUE_BRAND_RATIO_END,
T3.UNIT_BRAND_RATIO,
T3.UNIT_BRAND_START,
T3.UNIT_BRAND_END,
T3.COUNTINGUNIT_BRAND_RATIO,
T3.COUNTINGUNIT_BRAND_START,
T3.COUNTINGUNIT_BRAND_END,
T3.PDOT_BRAND_RATIO,
T3.PDOT_BRAND_START,
T3.PDOT_BRAND_END
FROM
DM.DM_TD_EXT_RETAIL_DTP_PACK_PROPERTY T1
LEFT JOIN DM.DM_TD_EXT_DTP_MARKET_RATIO T2
ON T1.MARKET = T2.MARKET AND T1.PACK_CODE = T2.PACK_CODE
LEFT JOIN DM.DM_TD_EXT_DTP_MARKET_BRAND_RATIO T3
ON T1.MARKET = T3.MARKET AND T1.PACK_CODE = T3.PACK_CODE
WHERE UPPER(T1.MARKET) NOT LIKE "%ALL%"
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_DTP_MARKET_PACK_MAPPING
SELECT
PACK_CODE,
MARKET,
CLASS,
KEY_COMPETITOR,
MARKET_RATIO,
STARTTIME,
ENDTIME,
VALUE_BRAND_RATIO,
VALUE_BRAND_RATIO_START,
VALUE_BRAND_RATIO_END,
UNIT_BRAND_RATIO,
UNIT_BRAND_START,
UNIT_BRAND_END,
COUNTINGUNIT_BRAND_RATIO,
COUNTINGUNIT_BRAND_START,
COUNTINGUNIT_BRAND_END,
PDOT_BRAND_RATIO,
PDOT_BRAND_START,
PDOT_BRAND_END,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DTP_MARKET_PACK_MAPPING

View File

@@ -0,0 +1,88 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_RETAIL_MARKET_PACK_MAPPING
-- (
-- PACK_CODE STRING,
-- MARKET STRING,
-- CLASS STRING,
-- KEY_COMPETITOR STRING,
-- MARKET_RATIO STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- VALUE_BRAND_RATIO STRING,
-- VALUE_BRAND_RATIO_START STRING,
-- VALUE_BRAND_RATIO_END STRING,
-- UNIT_BRAND_RATIO STRING,
-- UNIT_BRAND_START STRING,
-- UNIT_BRAND_END STRING,
-- COUNTINGUNIT_BRAND_RATIO STRING,
-- COUNTINGUNIT_BRAND_START STRING,
-- COUNTINGUNIT_BRAND_END STRING,
-- PDOT_BRAND_RATIO STRING,
-- PDOT_BRAND_START STRING,
-- PDOT_BRAND_END STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_market_pack_mapping';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_retail_market_pack_mapping';
-- COMMAND ----------
CREATE OR REPLACE TEMPORARY VIEW RETAIL_MARKET_PACK_MAPPING
AS
SELECT DISTINCT
T1.PACK_CODE,
T1.MARKET,
T1.CLASS,
T1.KEY_COMPETITOR,
T2.MARKET_RATIO,
T2.STARTTIME,
T2.ENDTIME,
T3.VALUE_BRAND_RATIO,
T3.VALUE_BRAND_RATIO_START,
T3.VALUE_BRAND_RATIO_END,
T3.UNIT_BRAND_RATIO,
T3.UNIT_BRAND_START,
T3.UNIT_BRAND_END,
T3.COUNTINGUNIT_BRAND_RATIO,
T3.COUNTINGUNIT_BRAND_START,
T3.COUNTINGUNIT_BRAND_END,
T3.PDOT_BRAND_RATIO,
T3.PDOT_BRAND_START,
T3.PDOT_BRAND_END
FROM
DM.DM_TD_EXT_RETAIL_PACK_PROPERTY T1
LEFT JOIN DM.DM_TD_EXT_RETAIL_MARKET_RATIO T2
ON T1.MARKET = T2.MARKET AND T1.PACK_CODE = T2.PACK_CODE
LEFT JOIN DM.DM_TD_EXT_RETAIL_MARKET_BRAND_RATIO T3
ON T1.MARKET = T3.MARKET AND T1.PACK_CODE = T3.PACK_CODE
WHERE UPPER(T1.MARKET) NOT LIKE "%ALL%"
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_RETAIL_MARKET_PACK_MAPPING
SELECT
PACK_CODE,
MARKET,
CLASS,
KEY_COMPETITOR,
MARKET_RATIO,
STARTTIME,
ENDTIME,
VALUE_BRAND_RATIO,
VALUE_BRAND_RATIO_START,
VALUE_BRAND_RATIO_END,
UNIT_BRAND_RATIO,
UNIT_BRAND_START,
UNIT_BRAND_END,
COUNTINGUNIT_BRAND_RATIO,
COUNTINGUNIT_BRAND_START,
COUNTINGUNIT_BRAND_END,
PDOT_BRAND_RATIO,
PDOT_BRAND_START,
PDOT_BRAND_END,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM RETAIL_MARKET_PACK_MAPPING

View File

@@ -0,0 +1,367 @@
# Databricks notebook source
# MAGIC %md
# MAGIC ### 原本逻辑
# COMMAND ----------
#当更新pack 或品牌 事实数据时需要运行此代码,否则无需运行。
# COMMAND ----------
# MAGIC %sql
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_oap' where file_name ='pack-CV-抗血栓2通用名-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_htn' where file_name ='pack-CV-高血压-化学药-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_atomizer' where file_name ='pack-雾化器-全国&县域数据.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_anti_asthma_copd' where file_name ='pack-RE-慢阻肺-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_zk_brand' where file_name ='Brand-品牌数据报表.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_statin_xzk' where file_name ='pack-CV-他汀类+血脂康-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_rd' where file_name ='pack-RD-肾科-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_aagsa_ppi_oral' where file_name ='pack-GI-慢性胃炎胃溃疡-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_niad' where file_name ='pack-DM-口服降糖化学药.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_metoprolol_tartrat' where file_name ='pack-CV-酒石酸美托洛尔.xlsx';
# MAGIC
# COMMAND ----------
# pack数据自动接入 整合
#获取配置表信息(表名、brand_flag
df = spark.sql("""
SELECT DISTINCT table_name tab ,file_name brand_flag FROM dwd.dwd_gnd_ext_retail_corresponding_relationship
where type_name ='PACK'
""").collect()
def get_union_pack_data(df):
#初始化结果集
union_query = None
# niad_pdot_unit需特殊赋值对应表名tmp.tmp_inc_gnd_ext_retail_nataional_niad
# niad_pdot_unit_flag = 'tmp.tmp_inc_gnd_ext_retail_nataional_niad'
for table in df:
# 选择当前表名
T = str(table.tab)
# 获取对应brand表维度对应得 market 名称
brand_flag = str(table.brand_flag)
sql = f"""
select
cast(t1.month as int) AS YYYYMM
,cast(left(t1.quarter, 4) as int) AS year
,right(t1.quarter, 2) AS quarter
,t1.quarter AS yq
,t1.zk_product_id
,t1.zk_region
,t1.zk_rx_otc
,t1.zk_medicine_type
,t1.zk_medicine_tier1
,t1.zk_medicine_tier2
,t1.zk_medicine_tier3
,t1.zk_medicine_tier4
,t1.zk_common_name
,t1.zk_dosage_form
,t1.zk_user_type
,t1.zk_category_name
,t1.zk_product_name
,t1.zk_brand_name
,t1.zk_manu_des
,t1.zk_corp_des
,t1.zk_pack_des
,t1.price
,CAST(replace(t1.sales_unit,',','') AS decimal(30,10)) as sales_unit
,CAST(replace(t1.sales_value,',','') AS decimal(30,10)) as sales_value
,CAST(replace(t1.digital_spread_rate,',','') AS decimal(30,10)) as digital_spread_rate
,CAST(replace(t1.weighted_spread_rate,',','') AS decimal(30,10)) as weighted_spread_rate
,CAST(replace(t1.counting_unit,',','') AS decimal(30,10)) as counting_unit
,'{brand_flag}' as brand_flag
,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_insert_dt
,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_update_dt
from {T} t1
left join dws.dws_ext_retail_td_prod t2
on t1.zk_product_id = t2.zk_product_id
where month is not null
"""
# 读取数据
current_query = spark.sql(sql)
#union 数据
if union_query ==None:
union_query=current_query
else:
union_query = union_query.union(current_query)
#返回数据集 / 写入表也行???
return union_query
pack_result = get_union_pack_data(df)
pack_result.write.mode("overwrite").saveAsTable("dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all")
# COMMAND ----------
# brand+ 省份数据自动接入
#获取配置表信息(表名、brand_flag
dfband = spark.sql("""
SELECT DISTINCT table_name tab ,file_name brand_flag FROM dwd.dwd_gnd_ext_retail_corresponding_relationship
where type_name ='BRAND'
""").collect()
def get_union_brand_data(df):
#数据为空
if df == None:
return None
#初始化结果集
union_query = None
for table in df:
# 选择当前表名
T = str(table.tab)
# 获取对应brand表维度对应得 market 名称
pack_flag = str(table.brand_flag)
sql = f"""
select
cast(left(quarter, 4)*100 + right(quarter,1)*3 as int ) AS YYYYMM
,cast(left(quarter, 4) as int ) AS year
,right(quarter, 2) AS quarter
,quarter AS yq
,type AS brand_cat_type
,case when ta = 'NIAD' then 'DM' else ta end AS TA
,market AS market
,zk_brand_category AS zk_brand_category
,zk_common_name AS zk_common_name
,zk_manu_des AS zk_manu_des
,rc_name_en AS rc_name_en
,province_city AS province_city
,ytd AS ytd
,cast(sales_value * 1000000 as decimal(30,10)) AS sales_val
,cast(sales_volume * 1000000 as decimal(30,10)) AS sales_vol
,cast(price as decimal(30,10)) as price
,cast(num_dist_rate as decimal(30,10)) as num_dist_rate
,cast(weig_dist_rate as decimal(30,10)) as weig_dist_rate
,cast(value_share as decimal(30,10)) as val_share
,cast(volume_share as decimal(30,10)) as vol_share
,replace(key_brand_ytd,'-','') as key_brand_ytd
,cast(replace(key_brand_rank_ytd,'-','0') as int) as key_brand_rank_ytd
,replace(top_brand_ytd,'-','') as top_brand_ytd
,cast(replace(top_brand_ms_ytd,'-','0') as decimal(30,10)) as top_brand_ms_ytd
,cast(replace(top_brand_inc_ms_ytd,'-','0') as decimal(30,10)) as top_brand_inc_ms_ytd
,cast(replace(top_brand_gr_ytd,'-','0') as decimal(30,10)) as top_brand_gr_ytd
,replace(key_brand_qtd,'-','') as key_brand_qtd
,cast(replace(key_brand_rank_qtd,'-','0') as int) as key_brand_rank_qtd
,replace(top_brand_qtd,'-','') as top_brand_qtd
,cast(replace(top_brand_ms_qtd,'-','0') as decimal(30,10)) as top_brand_ms_qtd
,cast(replace(top_brand_inc_ms_qtd,'-','0') as decimal(30,10)) as top_brand_inc_ms_qtd
,cast(replace(top_brand_gr_qtd,'-','0') as decimal(30,10)) as top_brand_gr_qtd
,ranked_by as ranked_by
,'{pack_flag}' as pack_flag
,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_insert_dt
,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_update_dt
from {T}
"""
# 读取数据
current_query = spark.sql(sql)
#union 数据
if union_query == None:
union_query = current_query
else:
union_query = union_query.union(current_query)
#返回数据集 / 写入表也行???
return union_query
brand_result = get_union_brand_data(dfband)
brand_result.write.mode("overwrite").saveAsTable("dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all")
# COMMAND ----------
# MAGIC %sql
# MAGIC ----------------------多达一、天一宁、others、氨氯地平阿托伐他汀钙 数据在 高血压和他汀血脂康里面重复但该数据在品牌报表里面没有对应的值会导致后续拆分到pack + 省份时得到的结果不一致,因此需要将高血压或者他汀血脂康数据 根据pack+ 全国数据按省份数平均分配并汇总到品牌维度写入品牌数据报表为后续pack +全国拆分到pack + 省份 提供数据基础
# MAGIC insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC with data_pack as (
# MAGIC ----------底表获取基础数据----pack 汇总到品牌 --并按省份数量平均
# MAGIC select brand_flag,a.YYYYMM,nvl(b.prod_des_c,'OTHERS' ) prod_des_c ,sum(a.sales_value)/sum(num) sales_value,sum(a.sales_unit)/sum(num) sales_unit ,sum(a.sales_value) sales ,sum(a.sales_unit) saleu
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all a
# MAGIC left join dwd.dwd_gnd_ext_retail_pack_property b on a.zk_product_id = b.product_id
# MAGIC cross join (select count(distinct zk_region) num from dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all where zk_region<>'全国')
# MAGIC where a.zk_common_name ='氨氯地平阿托伐他汀钙' and a.brand_flag in ('pack-CV-他汀类+血脂康-全国.xlsx' ,'pack-CV-高血压-化学药-全国.xlsx','pack-CV-抗血栓2通用名-全国.xlsx')
# MAGIC group by 1,2,3
# MAGIC ),city as (
# MAGIC ----获取省份及年月头表
# MAGIC select distinct
# MAGIC YYYYMM
# MAGIC ,year
# MAGIC ,quarter
# MAGIC ,yq
# MAGIC ,province_city
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where province_city !='全国' and ranked_by ='volume'
# MAGIC )
# MAGIC , split as (
# MAGIC ------分别补充全国数据、品牌数据------
# MAGIC select
# MAGIC city.YYYYMM
# MAGIC ,year
# MAGIC ,quarter
# MAGIC ,yq
# MAGIC ,'品牌' brand_cat_type
# MAGIC ,'CV' TA
# MAGIC ,brand_flag market
# MAGIC ,prod_des_c
# MAGIC ,''
# MAGIC ,''
# MAGIC ,''
# MAGIC ,province_city
# MAGIC ,''
# MAGIC ,sales_value
# MAGIC ,sales_unit
# MAGIC ,0,0,0,0,0,'' ,0 ,'' ,0,0,0,'',0,'',0,0,0,'volume' ,''
# MAGIC from data_pack left join city on city.YYYYMM=data_pack.YYYYMM
# MAGIC union all
# MAGIC select
# MAGIC city.YYYYMM
# MAGIC ,year
# MAGIC ,quarter
# MAGIC ,yq
# MAGIC ,'品牌' brand_cat_type
# MAGIC ,'CV' TA
# MAGIC ,brand_flag market
# MAGIC ,prod_des_c
# MAGIC ,''
# MAGIC ,''
# MAGIC ,''
# MAGIC ,'全国' province_city
# MAGIC ,''
# MAGIC ,sales sales_value
# MAGIC ,saleu sales_unit
# MAGIC ,0,0,0,0,0,'' ,0 ,'' ,0,0,0,'',0,'',0,0,0,'volume' ,''
# MAGIC from data_pack left join (select distinct year,quarter,yyyymm,yq from city ) city on city.YYYYMM=data_pack.YYYYMM
# MAGIC
# MAGIC )
# MAGIC select * from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC union all
# MAGIC select *
# MAGIC ,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_insert_dt
# MAGIC ,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_update_dt
# MAGIC from split
# COMMAND ----------
# MAGIC %sql
# MAGIC --补位
# MAGIC with tmp as (
# MAGIC select
# MAGIC YYYYMM,
# MAGIC year,
# MAGIC quarter,
# MAGIC yq,
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC price,
# MAGIC sales_unit,
# MAGIC sales_value,
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC brand_flag,
# MAGIC etl_insert_dt,
# MAGIC etl_update_dt
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all
# MAGIC -- pack-CV-酒石酸美托洛尔.xlsx的数据没有了所以排除 酒石酸美托洛尔
# MAGIC where brand_flag<>'pack-CV-酒石酸美托洛尔.xlsx'
# MAGIC )
# MAGIC
# MAGIC insert overwrite dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all
# MAGIC select *
# MAGIC from tmp;
# COMMAND ----------
# MAGIC %md
# MAGIC ### 新逻辑
# MAGIC - 修改brand数据先拆分成月维度的数据
# COMMAND ----------
# MAGIC %sql
# MAGIC /*
# MAGIC 修改时间20250311
# MAGIC 修改人chenwu
# MAGIC 修改内容brand来数频率为 季度来数, 但是 pack 为 月度来数据,需要用季度的数据/3得到月度的
# MAGIC */
# MAGIC insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC with quarterly_table as (
# MAGIC select
# MAGIC *
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where market not in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液')
# MAGIC -- 范围内只能是 季度来数据的,如果有月度来数据的需要排除掉
# MAGIC )
# MAGIC
# MAGIC ,month_table as (--转化成月度数据
# MAGIC SELECT
# MAGIC SUBSTR(q.yq, 1, 4)*100 + -- 提取年份
# MAGIC LPAD(m.month_num, 2, '0') -- 补零月份
# MAGIC AS YYYYMM -- 月份首日
# MAGIC ,`year`
# MAGIC ,`quarter`
# MAGIC ,yq
# MAGIC ,brand_cat_type
# MAGIC ,TA
# MAGIC ,market
# MAGIC ,zk_brand_category
# MAGIC ,zk_common_name
# MAGIC ,zk_manu_des
# MAGIC ,rc_name_en
# MAGIC ,province_city
# MAGIC ,ytd
# MAGIC ,sales_val /3 --除3
# MAGIC ,sales_vol /3 --除3
# MAGIC ,price
# MAGIC ,num_dist_rate
# MAGIC ,weig_dist_rate
# MAGIC ,val_share
# MAGIC ,vol_share
# MAGIC ,key_brand_ytd
# MAGIC ,key_brand_rank_ytd
# MAGIC ,top_brand_ytd
# MAGIC ,top_brand_ms_ytd
# MAGIC ,top_brand_inc_ms_ytd
# MAGIC ,top_brand_gr_ytd
# MAGIC ,key_brand_qtd
# MAGIC ,key_brand_rank_qtd
# MAGIC ,top_brand_qtd
# MAGIC ,top_brand_ms_qtd
# MAGIC ,top_brand_inc_ms_qtd
# MAGIC ,top_brand_gr_qtd
# MAGIC ,ranked_by
# MAGIC ,pack_flag
# MAGIC ,etl_insert_dt
# MAGIC ,etl_update_dt
# MAGIC FROM
# MAGIC quarterly_table q
# MAGIC LATERAL VIEW EXPLODE( -- 为每季度生成三个月
# MAGIC CASE
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q1' THEN ARRAY(1, 2, 3)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q2' THEN ARRAY(4, 5, 6)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q3' THEN ARRAY(7, 8, 9)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q4' THEN ARRAY(10, 11, 12)
# MAGIC END
# MAGIC ) m AS month_num
# MAGIC )
# MAGIC
# MAGIC ,other_not_quarterly_table (
# MAGIC select
# MAGIC *
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where market in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液')
# MAGIC -- 范围内只能是 月度来数据的
# MAGIC )
# MAGIC
# MAGIC select * from month_table
# MAGIC union all
# MAGIC select * from other_not_quarterly_table

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff