# Databricks notebook source ### constant LEVEL_TA_CV = 'CV' LEVLE_TA_RE = 'RE' LEVEL_TA_RE2 = 'RE2' LEVEL_TA_GI = 'GI' LEVEL_TA_DM = 'DM' LEVEL_TA_RD = 'RD' LEVEL_MARKET_HTN = '高血压用药' LEVEL_MARKET_STATIN_XZK = '他汀类+血脂康' LEVEL_MARKET_BRILINTA = 'Brilinta Market' LEVEL_MARKET_COPD = '小儿咳喘' LEVEL_MARKET_AAGSA_PPI_ORAL = '慢性胃炎、胃溃疡' LEVEL_MARKET_ATOMIZER = '慢性阻塞性肺疾病' LEVEL_MARKET_NIAD = 'NIAD' LEVEL_MARKET_RD = 'RD Market' # COMMAND ---------- ############################################################START############################################################## ### STEP-1: load rawdata to tmp table # COMMAND ---------- # MAGIC %md # MAGIC ## STEP-1: load rawdata to tmp table # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.1 load brand data to tmp.tmp_retail_brand_rawdata # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert overwrite table tmp.tmp_retail_brand_rawdata # MAGIC # MAGIC select # MAGIC type, # MAGIC ta, # MAGIC market, # MAGIC zk_brand_category, # MAGIC zk_common_name, # MAGIC zk_manu_des, # MAGIC rc_name_en, # MAGIC province_city, # MAGIC quarter, # MAGIC ytd, # MAGIC '', # MAGIC sales_value * 1000000, # MAGIC sales_volume * 1000000 # MAGIC from dwd.dwd_gnd_ext_zk_brand # MAGIC where ranked_by = 'value' # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.1 load from dwd.dwd_gnd_ext_retail_htn (pack-CV-高血压-化学药-全国.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert overwrite table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 'CV', # MAGIC '高血压用药', # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_htn # MAGIC --------------------------------- *hard_code* --------------------------------------- # MAGIC -- 多达一、天一宁、others、氨氯地平阿托伐他汀钙 数据在 高血压和他汀血脂康里面重复, # MAGIC -- 只需要取一份,优先取有拆分比例的 # MAGIC -- 对应的product_id 分别是:'-356','5258049','5852881','9167744','9526959','9167556','9279325','8533952' # MAGIC -- 或者使用条件:zk_common_name <> '氨氯地平阿托伐他汀钙' # MAGIC where zk_common_name <> '氨氯地平阿托伐他汀钙' # MAGIC --------------------------------- *hard_code* --------------------------------------- # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.2 load from dwd.dwd_gnd_ext_retail_statin_xzk (pack-CV-他汀类+血脂康-全国.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 'CV', # MAGIC '他汀类+血脂康', # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_statin_xzk # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.3 load from dwd.dwd_gnd_ext_retail_nataional_oap (pack-CV-抗血栓2通用名-全国.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 'CV', # MAGIC 'Brilinta Market', # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_nataional_oap # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.4 load from dwd.dwd_gnd_ext_retail_anti_asthma_copd (pack-RE-慢阻肺-全国.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC --------------2024----------- # MAGIC --'RE', # MAGIC --'慢性阻塞性肺疾病', # MAGIC --------------2025----------- # MAGIC 'RE2', # MAGIC '小儿咳喘', # MAGIC ----------------------------- # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_anti_asthma_copd # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.5 load from dwd.dwd_gnd_ext_retail_aagsa_ppi_oral (pack-GI-慢性胃炎胃溃疡-全国.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null as pack_code, # MAGIC null as molecule_code, # MAGIC null as molecule_desc, # MAGIC null as product_code, # MAGIC null as product_desc, # MAGIC 'GI' as level_ta, # MAGIC '慢性胃炎、胃溃疡' as level_market, # MAGIC null as level_molecule, # MAGIC null as level_brand, # MAGIC null as ratio_val, # MAGIC null as ratio_vol, # MAGIC 0 as data_flag, # MAGIC null as brand_flag # MAGIC from dwd.dwd_gnd_ext_retail_aagsa_ppi_oral # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.6 load from dwd.dwd_gnd_ext_retail_atomizer (pack-雾化器-全国&县域数据.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 'RE', # MAGIC '慢性阻塞性肺疾病', # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_atomizer # MAGIC ------------------------------ *hard_code* ------------------------------------------ # MAGIC -- 24年关于 RE-慢性阻塞性肺疾病的PACK数据,额外买了“雾化器”的24省数据 # MAGIC -- 所以这里只取24省明细数据,全国的数已经在“pack-GI-慢性胃炎胃溃疡-全国.xlsx”中取过了 # MAGIC -- 25年就直接取全部数据 # MAGIC -- where zk_region <> '全国' # MAGIC ------------------------------ *hard_code* ------------------------------------------ # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.7 load from dwd.dwd_gnd_ext_retail_nataional_niad (pack-DM-口服降糖化学药.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 'DM', # MAGIC 'NIAD', # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_nataional_niad # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.8 load from dwd.dwd_gnd_ext_retail_nataional_rd (pack-RD-肾科-全国.xlsx) # MAGIC ------------------------------------------------------------------------------------- # MAGIC insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC ------------------------------ *hard_code* ------------------------------------------ # MAGIC -- 剔除利尿剂数据 # MAGIC with tmp_exclusion as ( # MAGIC select distinct product_id # MAGIC from dwd.dwd_gnd_ext_retail_pack_property # MAGIC where lower(atc2_cod) = 'c03' # MAGIC ) # MAGIC ------------------------------ *hard_code* ------------------------------------------ # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 'RD', # MAGIC 'RD Market', # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_nataional_rd a # MAGIC where not exists ( # MAGIC select * from tmp_exclusion b # MAGIC where case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end = b.product_id # MAGIC ) # MAGIC # COMMAND ---------- # MAGIC %sql # MAGIC ------------------------------------------------------------------------------------- # MAGIC -- STEP-1: load rawdata to tmp table # MAGIC -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # MAGIC -- 1.2.9 load from dwd.dwd_gnd_ext_retail_metoprolol_tartrat (pack-CV-酒石酸美托洛尔.xlsx) # MAGIC -- 2024年采买了酒石酸美托洛尔的PACK数据,2025年未采买 # MAGIC ------------------------------------------------------------------------------------- # MAGIC --insert into table tmp.tmp_retail_pack_rawdata # MAGIC # MAGIC select # MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # MAGIC zk_product_id, # MAGIC zk_region, # MAGIC zk_rx_otc, # MAGIC zk_medicine_type, # MAGIC zk_medicine_tier1, # MAGIC zk_medicine_tier2, # MAGIC zk_medicine_tier3, # MAGIC zk_medicine_tier4, # MAGIC zk_common_name, # MAGIC zk_dosage_form, # MAGIC zk_user_type, # MAGIC zk_category_name, # MAGIC zk_product_name, # MAGIC zk_brand_name, # MAGIC zk_manu_des, # MAGIC zk_corp_des, # MAGIC zk_pack_des, # MAGIC month, # MAGIC quarter, # MAGIC replace(price,',','' ), # MAGIC replace(sales_unit,',','' ), # MAGIC replace(sales_value,',','' ), # MAGIC digital_spread_rate, # MAGIC weighted_spread_rate, # MAGIC counting_unit, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 'CV', # MAGIC '高血压用药', # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC null, # MAGIC 0, # MAGIC null # MAGIC from dwd.dwd_gnd_ext_retail_metoprolol_tartrat a # MAGIC ------------------------------ *hard_code* ------------------------------------------ # MAGIC -- 24年关于 CV-高血压的PACK数据,额外买了“酒石酸美托洛尔”的24省数据 # MAGIC -- 所以这里只取24省明细数据,全国的数已经在“pack-CV-高血压-化学药-全国.xlsx”中取过了 # MAGIC where zk_region <> '全国' # MAGIC ------------------------------ *hard_code* ------------------------------------------ # COMMAND ---------- # %sql # 20260302 chenwu取消此段运行,小儿咳喘模版暂不使用。Fasenra Market数据由DTP RAW DATA上传 # ------------------------------------------------------------------------------------- # -- STEP-1: load rawdata to tmp table # -- 1.2 load brand data to tmp.tmp_retail_pack_rawdata # -- 1.2.10 load from dwd.dwd_gnd_ext_retail_asthma (pack-手动补充) # -- 手动补充的小儿咳喘的数据,这些数据无需拆分 # ------------------------------------------------------------------------------------- # insert into table tmp.tmp_retail_pack_rawdata # select # case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as product_id, # zk_product_id, # zk_region, # zk_rx_otc, # zk_medicine_type, # zk_medicine_tier1, # zk_medicine_tier2, # zk_medicine_tier3, # zk_medicine_tier4, # zk_common_name, # zk_dosage_form, # zk_user_type, # zk_category_name, # zk_product_name, # zk_brand_name, # zk_manu_des, # zk_corp_des, # zk_pack_des, # month, # quarter, # price, # sales_unit, # sales_value, # digital_spread_rate, # weighted_spread_rate, # counting_unit, # null, # null, # null, # null, # null, # 'RE_NO_SPLIT', # '小儿咳喘', # null, # null, # null, # null, # 0, # null # from dwd.dwd_gnd_ext_retail_asthma # COMMAND ---------- ############################################################END##############################################################