Files
MarketAnalysis-ETL/EXTERNAL/Retail/05 load_dtp_temp_data.py
2026-04-20 14:55:25 +08:00

158 lines
5.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Databricks notebook source
# MAGIC %sql
# MAGIC -- CREATE or REPLACE TABLE tmp.tmp_retail_dtp_pack_rawdata (
# MAGIC -- -- product_id STRING,
# MAGIC -- iqvia_pack_code string,
# MAGIC -- -- region_type STRING,
# MAGIC -- year STRING,
# MAGIC -- time STRING,
# MAGIC -- -- higher_level_region STRING,
# MAGIC -- region STRING,
# MAGIC -- -- prescription_nature STRING,
# MAGIC -- -- medicine_attribute STRING,
# MAGIC -- -- dosage_form STRING,
# MAGIC -- -- object STRING,
# MAGIC -- -- zk_classify1 STRING,
# MAGIC -- -- zk_classify2 STRING,
# MAGIC -- -- zk_classify3 STRING,
# MAGIC -- target_points STRING,
# MAGIC -- -- common_name STRING,
# MAGIC -- -- brand_name STRING,
# MAGIC -- -- product_name STRING,
# MAGIC -- -- category_name STRING,
# MAGIC -- -- pack_des STRING,
# MAGIC -- counting_unit DECIMAL(20,8),
# MAGIC -- -- factory STRING,
# MAGIC -- -- corp_des STRING,
# MAGIC -- average_price DECIMAL(20,8),
# MAGIC -- sales_amount DECIMAL(20,8),
# MAGIC -- sales_volume DECIMAL(20,8),
# MAGIC -- counting_units_obversion DECIMAL(20,8),
# MAGIC -- pack_code STRING,
# MAGIC -- molecule_code STRING,
# MAGIC -- molecule_desc STRING,
# MAGIC -- product_code STRING,
# MAGIC -- product_desc STRING,
# MAGIC -- level_ta STRING,
# MAGIC -- level_market STRING,
# MAGIC -- level_molecule STRING,
# MAGIC -- level_brand STRING,
# MAGIC -- ratio_val DECIMAL(20,10),
# MAGIC -- ratio_vol DECIMAL(20,10),
# MAGIC -- data_flag INT,
# MAGIC -- brand_flag INT)
# MAGIC -- USING delta
# MAGIC -- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/TMP/tmp_retail_dtp_pack_rawdata'
# MAGIC -- ;
# COMMAND ----------
### constant
LEVEL_TA_ONC = 'ONC'
LEVEL_MARKET_EGFR_TKI = 'EGFR TKI'
# COMMAND ----------
############################################################START##############################################################
### STEP-2: load rawdata to tmp table
# COMMAND ----------
# MAGIC %md
# MAGIC ## STEP-1: load rawdata to tmp table
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.1 load brand data to tmp_retail_dtp_brand_rawdata
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_dtp_brand_rawdata
# MAGIC
# MAGIC select
# MAGIC type,
# MAGIC 'ONC' as ta,
# MAGIC market,
# MAGIC zk_brand_category,
# MAGIC zk_common_name,
# MAGIC zk_manu_des,
# MAGIC rc_name_en,
# MAGIC province_city,
# MAGIC yyyymm,
# MAGIC ytd,
# MAGIC sales_value,
# MAGIC sales_volume,
# MAGIC price,
# MAGIC pdot_counting_unit
# MAGIC from dwd.dwd_gnd_ext_dtp_zk_brand
# MAGIC where ranked_by = 'value'
# MAGIC
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: load rawdata to tmp table
# MAGIC -- 1.2 load brand data to tmp_retail_dtp_pack_rawdata
# MAGIC -------------------------------------------------------------------------------------
# MAGIC insert overwrite table tmp.tmp_retail_dtp_pack_rawdata
# MAGIC
# MAGIC select
# MAGIC -- dtp_raw_data.product_id,
# MAGIC dtp_pack_data.iqvia_pack_code,
# MAGIC -- dtp_raw_data.region_type,
# MAGIC dtp_raw_data.year,
# MAGIC dtp_raw_data.time,
# MAGIC -- dtp_raw_data.higher_level_region,
# MAGIC dtp_raw_data.region,
# MAGIC -- dtp_raw_data.prescription_nature,
# MAGIC -- dtp_raw_data.medicine_attribute,
# MAGIC -- dtp_raw_data.dosage_form,
# MAGIC -- dtp_raw_data.object,
# MAGIC -- dtp_raw_data.zk_classify1,
# MAGIC -- dtp_raw_data.zk_classify2,
# MAGIC -- dtp_raw_data.zk_classify3,
# MAGIC dtp_raw_data.target_points,
# MAGIC -- dtp_raw_data.common_name,
# MAGIC -- dtp_raw_data.brand_name,
# MAGIC -- dtp_raw_data.product_name,
# MAGIC -- dtp_raw_data.category_name,
# MAGIC -------------------------* hard_code *-------------------------
# MAGIC -- 针对pack_des这个字段pack原始表使用的是'*'但手工表pack_properity使用的是'x'
# MAGIC -- 而pack_des字段与关联出pack_code有关缺少此字段会导致关联失败所以要手动消除差异
# MAGIC -- replace(dtp_raw_data.pack_des,'*','x'),
# MAGIC -------------------------* hard_code *-------------------------
# MAGIC max(dtp_raw_data.counting_unit) counting_unit,
# MAGIC -- dtp_raw_data.factory,
# MAGIC -- dtp_raw_data.corp_des,
# MAGIC max(dtp_raw_data.average_price) average_price,
# MAGIC sum(dtp_raw_data.sales_amount) sales_amount,
# MAGIC sum(dtp_raw_data.sales_volume) sales_volume,
# MAGIC max(dtp_raw_data.counting_units_obversion) counting_units_obversion,
# MAGIC null as pack_code,
# MAGIC null as molecule_code,
# MAGIC null as molecule_desc,
# MAGIC null as product_code,
# MAGIC null as product_desc,
# MAGIC 'ONC' as level_ta,
# MAGIC null as level_market, --- 此pack原始表文件中存在多个市场后续通过关联更新
# MAGIC null as level_molecule,
# MAGIC null as level_brand,
# MAGIC null as ratio_val,
# MAGIC null as ratio_vol,
# MAGIC 0 as data_flag,
# MAGIC null as brand_flag
# MAGIC from dwd.dwd_gnd_ext_retail_dtp_datasource as dtp_raw_data
# MAGIC left join dwd.dwd_gnd_ext_dtp_pack_property as dtp_pack_data
# MAGIC on dtp_raw_data.product_id = dtp_pack_data.product_id
# MAGIC group by
# MAGIC dtp_pack_data.iqvia_pack_code,
# MAGIC dtp_raw_data.year,
# MAGIC dtp_raw_data.time,
# MAGIC dtp_raw_data.region,
# MAGIC dtp_raw_data.target_points
# COMMAND ----------
############################################################END##############################################################