Add EXTERNAL SQL scripts

This commit is contained in:
2026-04-20 14:55:25 +08:00
parent c05ba7ec7e
commit b4979eed82
344 changed files with 61619 additions and 0 deletions

View File

@@ -0,0 +1,194 @@
# Databricks notebook source
# MAGIC %md
# MAGIC ### 从blob读取csv文件作为xiehe的事实表
# COMMAND ----------
# MAGIC %run ../../../Common/config
# COMMAND ----------
from datetime import datetime, timedelta
import pandas as pd
# COMMAND ----------
if ENVIRONMENT == PRD_ENVIRONMENT_VALUE:
factsales_file_path_template = "abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
elif ENVIRONMENT == TEST_ENVIRONMENT_VALUE:
factsales_file_path_template = "abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
# COMMAND ----------
# 计算时间得到当天的路径
current_date = datetime.utcnow() + timedelta(hours=8)
date_path = current_date.strftime("%Y/%m/%d/")
base_path = factsales_file_path_template + date_path
# COMMAND ----------
# 路径是否存在
def path_exists(path):
try:
dbutils.fs.ls(path)
return True
except Exception as e:
if "java.io.FileNotFoundException" in str(e):
return False
else:
print(f"检查路径 {path} 时出错: {e}")
raise
# COMMAND ----------
# 列出blob上的文件列表
def list_file_name(path):
first_path_list = [i.path for i in dbutils.fs.ls(path)]
second_path_list = [dbutils.fs.ls(i)[0] for i in first_path_list ]
return second_path_list
# COMMAND ----------
# 从blob下载文件到local
def download_file(file_path, local_path):
# dbutils.fs.cp(file_path, local_path.replace("/dbfs", ""))
dbutils.fs.cp(file_path, local_path)
print(f"已下载 {file_path}{local_path}")
return local_path
# COMMAND ----------
# MAGIC %md
# MAGIC ### 获取路径下的文件名称
# MAGIC - 并挑出符合条件的文件路径
# COMMAND ----------
try:
if path_exists(base_path):
all_file_list = list_file_name(base_path)
# 生成df来筛选内容
files_df = pd.DataFrame([{
'path':f.path,
'modificationtime': f.modificationTime,
'name': f.name
} for f in all_file_list])
print(f"{base_path} 路径存在")
else:
print(f"{base_path} 路径不存在")
except Exception as e:
print(e)
# COMMAND ----------
try:
files_df = files_df.sort_values('modificationtime', ascending=False).drop_duplicates('name').sort_index()
files_df = files_df[files_df['name'].str.match(r'^Dept_Fact.*\.csv$')]
files_df
except Exception as e:
print(e)
# COMMAND ----------
# MAGIC %md
# MAGIC ### 读取文件内容
# COMMAND ----------
import os
# COMMAND ----------
# 下载数据到local
try:
if files_df['path'].tolist():
# 如果列表不为空
df_all = []
for file in files_df['path'].tolist():
local_path = download_file(file, f"/Volumes/{NGBI_CATALOG}/tmp/volume_tmp/tmp/{os.path.basename(file)}")
file_df = (spark.read.option("header", "true").option("quote", '"').option("escape", '"').option("multiLine", "true").option("mode", "PERMISSIVE").csv(local_path))
print(f'已读取{local_path}')
df_all.append(file_df)
# df_combine = pd.concat(df_all)
df_ifexists = True
else:
print('没有符合条件的文件')
df_ifexists= False
except Exception as e:
print(e)
# COMMAND ----------
try:
if df_ifexists: # 如果不为空
spark.sql(f"TRUNCATE table tmp.tmp_xiehe_raw_data")
num = 1
for i in df_all:
df_renamed = i.withColumnRenamed('区域','area') \
.withColumnRenamed('城市','city') \
.withColumnRenamed('年&季度','yyyyqq') \
.withColumnRenamed('','yyyymm') \
.withColumnRenamed('医保类型','reimburse') \
.withColumnRenamed('处方来源','prescription_source') \
.withColumnRenamed('处方科室_lv1','prescription_dept_lv1') \
.withColumnRenamed('处方科室_lv2','prescription_dept_lv2') \
.withColumnRenamed('处方科室_lv3','prescription_dept_lv3') \
.withColumnRenamed('ATC','ATC') \
.withColumnRenamed('PHCD标准码','new_code') \
.withColumnRenamed('药品通用名','common_name') \
.withColumnRenamed('药品商品名','product_name') \
.withColumnRenamed('规格','pack_des') \
.withColumnRenamed('PackSize','PackSize') \
.withColumnRenamed('PackageType','PackageType') \
.withColumnRenamed('给药途径','nfc') \
.withColumnRenamed('药品厂家','manu_des') \
.withColumnRenamed('药品剂型','drug_delivery_route') \
.withColumnRenamed('处方张数','prescription') \
.withColumnRenamed('取药数量','sales_vol') \
.withColumnRenamed('处方金额','sales_value')
# i.columns = ['area','city','yyyyqq','yyyymm','reimburse','prescription_source','prescription_dept_lv1','prescription_dept_lv2','prescription_dept_lv3','ATC','new_code','common_name','product_name','pack_des','PackSize','PackageType','nfc','manu_des','drug_delivery_route','prescription','sales_vol','sales_value'] #'h_level',
# sdf = spark.createDataFrame(i)
df_renamed.createOrReplaceTempView('fact_sales')
spark.sql(f"INSERT into tmp.tmp_xiehe_raw_data SELECT area,city,yyyyqq,yyyymm,reimburse,prescription_source,prescription_dept_lv1,prescription_dept_lv2,prescription_dept_lv3,ATC,new_code,common_name,product_name,pack_des,PackSize,PackageType,nfc,manu_des,drug_delivery_route,prescription,sales_vol,sales_value FROM fact_sales")
print(f'{num}')
num +=1
except Exception as e:
print(e)
# COMMAND ----------
# MAGIC %md
# MAGIC ### 将读取到的dataframe写入表中
# COMMAND ----------
# MAGIC %sql
# MAGIC -- 全量覆盖
# MAGIC insert overwrite dwd.dwd_gnd_ext_xiehe_raw_data
# MAGIC select
# MAGIC area ,
# MAGIC city ,
# MAGIC yyyyqq ,
# MAGIC yyyymm ,
# MAGIC null h_level ,
# MAGIC reimburse ,
# MAGIC prescription_source ,
# MAGIC prescription_dept_lv1 ,
# MAGIC prescription_dept_lv2 ,
# MAGIC prescription_dept_lv3 ,
# MAGIC ATC ,
# MAGIC new_code ,
# MAGIC common_name ,
# MAGIC product_name ,
# MAGIC pack_des ,
# MAGIC cast ( PackSize as BIGINT) PackSize ,
# MAGIC PackageType ,
# MAGIC nfc ,
# MAGIC manu_des ,
# MAGIC drug_delivery_route,
# MAGIC cast ( prescription as BIGINT) prescription ,
# MAGIC cast ( sales_vol as DECIMAL(38,8)) sales_vol,
# MAGIC cast ( sales_value as DECIMAL(38,8)) sales_value ,
# MAGIC from_utc_timestamp(current_timestamp(),'UTC+8') etl_insert_dt,
# MAGIC from_utc_timestamp(current_timestamp(),'UTC+8') etl_update_dt
# MAGIC from tmp.tmp_xiehe_raw_data

Binary file not shown.

View File

@@ -0,0 +1,232 @@
-- Databricks notebook source
-- MAGIC %md
-- MAGIC ### xiehe的 pack property
-- COMMAND ----------
-- CREATE OR REPLACE TABLE dm.dm_xiehe_pack_property (
-- PACK_COD STRING,
-- pack_des STRING,
-- stgh_des STRING,
-- pack_lch STRING,
-- PROD_COD STRING,
-- prod_des STRING,
-- prod_des_c STRING,
-- Family_Code STRING,
-- Family_Name STRING,
-- cmps_cod STRING,
-- cmps_des STRING,
-- cmps_des_c STRING,
-- atc1_cod STRING,
-- atc1_des STRING,
-- atc1_des_c STRING,
-- atc2_cod STRING,
-- atc2_des STRING,
-- atc2_des_c STRING,
-- atc3_cod STRING,
-- atc3_des STRING,
-- atc3_des_c STRING,
-- atc4_cod STRING,
-- atc4_des STRING,
-- atc4_des_c STRING,
-- app1_cod STRING,
-- app1_des STRING,
-- app1_des_c STRING,
-- app2_cod STRING,
-- app2_des STRING,
-- app2_des_c STRING,
-- app3_cod STRING,
-- app3_des STRING,
-- app3_des_c STRING,
-- bio_desc STRING,
-- gene_orig_desc STRING,
-- eth_otc_desc STRING,
-- nrdl_desc STRING,
-- NRDL_Entry_Date STRING,
-- edl_desc STRING,
-- tcm_desc STRING,
-- paed_desc STRING,
-- gqce_desc STRING,
-- vbp_desc STRING,
-- manu_cod STRING,
-- manu_des STRING,
-- manu_des_c STRING,
-- mnfl_cod STRING,
-- mnfl_des STRING,
-- corp_cod STRING,
-- corp_des STRING,
-- corp_des_c STRING,
-- BrandType STRING,
-- IS_AZ STRING,
-- AZ_MAIN STRING,
-- AZ_Related STRING,
-- countingunit STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_xiehe_pack_property';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_xiehe_pack_property';
-- COMMAND ----------
create or replace temporary view dwd_pack_property
as
select
t1.iqvia_pack_code ,
t1.PACK_DES,
t1.STGH_DES,
t1.PACK_LCH,
null as Family_Code,
null as Family_Name,
t1.iqvia_prod_code ,
t1.PROD_DES ,
t1.PROD_DES_C ,
t1.CMPS_COD ,
t1.CMPS_DES ,
t1.CMPS_DES_C ,
t1.ATC1_COD ,
t1.ATC2_COD ,
t1.ATC3_COD ,
t1.ATC4_COD ,
t1.APP1_COD ,
t1.APP2_COD ,
t1.APP3_COD ,
t1.BIO_DESC ,
t1.GENE_ORIG_DESC ,
t1.ETH_OTC_DESC ,
t1.NRDL_DESC ,
null as NRDL_Entry_Date,
t1.EDL_DESC ,
t1.TCM_DESC ,
t1.PAED_DESC ,
t1.GQCE_DESC ,
t1.VBP_DESC ,
t1.MANU_COD ,
t1.MANU_DES ,
t1.MANU_DES_C ,
t1.MNFL_COD ,
t1.MNFL_DES ,
t1.CORP_COD ,
t1.CORP_DES ,
t1.CORP_DES_C ,
null as BrandType,
null as IS_AZ,
null as AZ_MAIN,
null as AZ_Related,
t1.atc1_des ,
t1.atc1_des_c ,
t1.atc2_des ,
t1.atc2_des_c ,
t1.atc3_des ,
t1.atc3_des_c ,
t1.atc4_des ,
t1.atc4_des_c ,
t1.app1_des ,
t1.app1_des_c ,
t1.app2_des ,
t1.app2_des_c ,
t1.app3_des ,
t1.app3_des_c ,
countingunit
from dwd.dwd_gnd_dept_pack_property t1
-- COMMAND ----------
-- create or replace table dm.dm_xiehe_pack_property
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_xiehe_pack_property'
-- as
insert overwrite table dm.dm_xiehe_pack_property
WITH dedup_l AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY iqvia_pack_code ORDER BY iqvia_pack_code) AS rn
FROM dwd_pack_property
QUALIFY rn = 1
),
dedup_r AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY PACK_COD ORDER BY PACK_COD) AS rn
FROM tmp.tmp_ext_chpa_pack_propetry
QUALIFY rn = 1
)
SELECT
l.iqvia_pack_code AS PACK_COD,
-- Pack / Product 描述
COALESCE(r.PACK_DES, l.pack_des) AS pack_des,
COALESCE(r.STGH_DES, l.stgh_des) AS stgh_des,
COALESCE(r.PACK_LCH, l.pack_lch) AS pack_lch,
COALESCE(r.PROD_COD, l.iqvia_prod_code) AS PROD_COD,
COALESCE(r.PROD_DES, l.prod_des) AS prod_des,
COALESCE(r.PROD_DES_C, l.prod_des_c) AS prod_des_c,
-- 右表独有Family
r.Family_Code,
r.Family_Name,
-- 成分
COALESCE(r.CMPS_COD, l.cmps_cod) AS cmps_cod,
COALESCE(r.CMPS_DES, l.cmps_des) AS cmps_des,
COALESCE(r.CMPS_DES_C, l.cmps_des_c) AS cmps_des_c,
-- ATC
COALESCE(r.ATC1_COD, l.atc1_cod) AS atc1_cod,
COALESCE(r.atc1_des, l.atc1_des) AS atc1_des,
COALESCE(r.atc1_des_c, l.atc1_des_c) AS atc1_des_c,
COALESCE(r.ATC2_COD, l.atc2_cod) AS atc2_cod,
COALESCE(r.atc2_des, l.atc2_des) AS atc2_des,
COALESCE(r.atc2_des_c, l.atc2_des_c) AS atc2_des_c,
COALESCE(r.ATC3_COD, l.atc3_cod) AS atc3_cod,
COALESCE(r.atc3_des, l.atc3_des) AS atc3_des,
COALESCE(r.atc3_des_c, l.atc3_des_c) AS atc3_des_c,
COALESCE(r.ATC4_COD, l.atc4_cod) AS atc4_cod,
COALESCE(r.atc4_des, l.atc4_des) AS atc4_des,
COALESCE(r.atc4_des_c, l.atc4_des_c) AS atc4_des_c,
-- APP
COALESCE(r.APP1_COD, l.app1_cod) AS app1_cod,
COALESCE(r.app1_des, l.app1_des) AS app1_des,
COALESCE(r.app1_des_c, l.app1_des_c) AS app1_des_c,
COALESCE(r.APP2_COD, l.app2_cod) AS app2_cod,
COALESCE(r.app2_des, l.app2_des) AS app2_des,
COALESCE(r.app2_des_c, l.app2_des_c) AS app2_des_c,
COALESCE(r.APP3_COD, l.app3_cod) AS app3_cod,
COALESCE(r.app3_des, l.app3_des) AS app3_des,
COALESCE(r.app3_des_c, l.app3_des_c) AS app3_des_c,
-- 标志位
COALESCE(r.BIO_DESC, l.bio_desc) AS bio_desc,
COALESCE(r.GENE_ORIG_DESC, l.gene_orig_desc) AS gene_orig_desc,
COALESCE(r.ETH_OTC_DESC, l.eth_otc_desc) AS eth_otc_desc,
COALESCE(r.NRDL_DESC, l.nrdl_desc) AS nrdl_desc,
r.NRDL_Entry_Date,
COALESCE(r.EDL_DESC, l.edl_desc) AS edl_desc,
COALESCE(r.TCM_DESC, l.tcm_desc) AS tcm_desc,
COALESCE(r.PAED_DESC, l.paed_desc) AS paed_desc,
COALESCE(r.GQCE_DESC, l.gqce_desc) AS gqce_desc,
COALESCE(r.VBP_DESC, l.vbp_desc) AS vbp_desc,
-- 厂商
COALESCE(r.MANU_COD, l.manu_cod) AS manu_cod,
COALESCE(r.MANU_DES, l.manu_des) AS manu_des,
COALESCE(r.MANU_DES_C, l.manu_des_c) AS manu_des_c,
COALESCE(r.MNFL_COD, l.mnfl_cod) AS mnfl_cod,
COALESCE(r.MNFL_DES, l.mnfl_des) AS mnfl_des,
-- 公司
COALESCE(r.CORP_COD, l.corp_cod) AS corp_cod,
COALESCE(r.CORP_DES, l.corp_des) AS corp_des,
COALESCE(r.CORP_DES_C, l.corp_des_c) AS corp_des_c,
-- 右表独有:品牌 / AZ 标志
r.BrandType,
r.IS_AZ,
r.AZ_MAIN,
r.AZ_Related,
-- 左表独有
l.countingunit,
from_utc_timestamp(current_timestamp(),'UTC+8') as ETL_INSERT_DT,
from_utc_timestamp(current_timestamp(),'UTC+8') as ETL_UPDATE_DT
FROM dedup_l l
LEFT JOIN dedup_r r
ON l.iqvia_pack_code = r.PACK_COD

View File

@@ -0,0 +1,76 @@
-- Databricks notebook source
-- MAGIC %md
-- MAGIC ### dws层xiehegeo
-- COMMAND ----------
insert overwrite dws.dws_ext_xiehe_geo
with geo_xiehe as (-- dwd层表
select fact_province,fact_city,geo_city,nvl(geo_province,'') geo_province
from dwd.dwd_gnd_xiehe_geo
group by fact_province,fact_city,geo_city,nvl(geo_province,'')
)
-- 特殊区域:'黑吉辽','云贵川青','湖北','甘肃','其他地区'
select
-- a.*,
concat('xiehe_',nvl(b.AUDIT_COD,concat(a.geo_province,a.fact_province,a.fact_city))) as AUDIT_COD
,nvl(b.CITY,a.geo_city) CITY
,nvl(b.CITY_C,a.geo_city) CITY_C
,'City' CITY_TYPE
,'4' AZ_CITY_TIER
,a.geo_province PROVINCE
,a.geo_province PROVINCE_C
,concat('xiehe_',nvl(b.PROVINCE_CODE,a.geo_province)) PROVINCE_CODE
,a.fact_city as CITY_MAP
,a.fact_province as PROVINCE_MAP
,case when a.geo_province='' then a.fact_province else a.geo_province end as REGION_CENTER
,from_utc_timestamp(current_timestamp(),'UTC+8') ETL_INSERT_DT
,from_utc_timestamp(current_timestamp(),'UTC+8') ETL_UPDATE_DT
from geo_xiehe a
left join dm.dm_ims_td_geo b on a.geo_city = b.CITY_C
where fact_province in ('黑吉辽','云贵川青','湖北','甘肃','其他地区')
union all
-- 非特殊区域
select
-- a.* ,
concat('xiehe_',nvl(b.AUDIT_COD,concat(a.geo_province,a.fact_province,a.fact_city))) as AUDIT_COD
,nvl(b.CITY,a.geo_city) CITY
,nvl(b.CITY_C,a.geo_city) CITY_C
,'City' CITY_TYPE
,nvl(b.AZ_CITY_TIER,'4') AZ_CITY_TIER
,a.geo_province PROVINCE
,a.geo_province PROVINCE_C
,concat('xiehe_',nvl(b.PROVINCE_CODE,a.geo_province)) PROVINCE_CODE
,a.fact_city as CITY_MAP
,a.fact_province as PROVINCE_MAP
,a.geo_province as REGION_CENTER
,from_utc_timestamp(current_timestamp(),'UTC+8') ETL_INSERT_DT
,from_utc_timestamp(current_timestamp(),'UTC+8') ETL_UPDATE_DT
from geo_xiehe a
left join dm.dm_ims_td_geo b on a.geo_city = b.CITY_C
where fact_province not in ('黑吉辽','云贵川青','湖北','甘肃','其他地区')
-- COMMAND ----------
-- MAGIC %md
-- MAGIC ### dm
-- COMMAND ----------
insert overwrite dm.dm_ext_xiehe_geo
select
AUDIT_COD
,CITY
,CITY_C
,CITY_TYPE
,AZ_CITY_TIER
,PROVINCE
,PROVINCE_C
,PROVINCE_CODE
,CITY_MAP
,PROVINCE_MAP
,REGION_CENTER
from dws.dws_ext_xiehe_geo

Binary file not shown.

View File

@@ -0,0 +1,219 @@
-- Databricks notebook source
-- MAGIC %md
-- MAGIC ### dws
-- COMMAND ----------
/*
修改时间20250707
修改人chenwu
内容xiehe数据源变更逻辑调整
*/
insert overwrite dws.dws_ext_xiehe_raw_data
with
--code iqviacode
pack_cod_2_iqvia_pack_code(
select
pack_cod
,max(IQVIA_PACK_CODE) IQVIA_PACK_CODE
from dwd.dwd_gnd_dept_pack_property
group by 1
)
,geo_mapping(
select
province_map fact_province
,city_map fact_city
,max(city_c) city
,max(province_c) province
,max(audit_cod) audit_cod
from dm.dm_ext_xiehe_geo
group by 1,2
)
,pack_countingunit(
select
pack_cod
,max(countingunit) countingunit
from dm.dm_xiehe_pack_property
group by 1
)
,underlying_ly_data(
select
c.audit_cod
,b.iqvia_pack_code
,c.city
,c.province
,a.yyyymm
,a.h_level
,a.reimburse
,a.prescription_source
,a.prescription_dept_lv1
,a.prescription_dept_lv2
,a.prescription_dept_lv3
,a.new_code
,a.atc
,a.common_name
,a.product_name
,a.manu_des
,a.pack_des
,a.drug_delivery_route
,a.nfc
,a.PackSize
,a.PackageType
,a.prescription
,a.sales_vol*nvl(d.countingunit,0) as sales_vol
-- ,a.price
,a.sales_value
,0 as prescription_ly
,0 as sales_vol_ly
-- ,0 as price_ly
,0 as sales_value_ly
from dwd.dwd_gnd_ext_xiehe_raw_data a
left join pack_cod_2_iqvia_pack_code b
on upper(a.new_code) = upper(b.pack_cod)
left join geo_mapping c
on a.area = c.fact_province
and a.city = c.fact_city
left join pack_countingunit d
on b.iqvia_pack_code = d.pack_cod
union all
select
c.audit_cod
,b.iqvia_pack_code
,c.city
,c.province
,(a.yyyymm + 100) yyyymm
,a.h_level
,a.reimburse
,a.prescription_source
,a.prescription_dept_lv1
,a.prescription_dept_lv2
,a.prescription_dept_lv3
,a.new_code
,a.atc
,a.common_name
,a.product_name
,a.manu_des
,a.pack_des
,a.drug_delivery_route
,a.nfc
,a.PackSize
,a.PackageType
,0 as prescription
,0 as sales_vol
-- ,0 as price
,0 as sales_value
,a.prescription as prescription_ly
,a.sales_vol*nvl(d.countingunit,0) as sales_vol_ly
-- ,a.price as price_ly
,a.sales_value as sales_value_ly
from dwd.dwd_gnd_ext_xiehe_raw_data a
left join pack_cod_2_iqvia_pack_code b
on upper(a.new_code) = upper(b.pack_cod)
left join geo_mapping c
on a.area = c.fact_province
and a.city = c.fact_city
left join pack_countingunit d
on b.iqvia_pack_code = d.pack_cod
where (a.yyyymm + 100) <= (select max(yyyymm) from dwd.dwd_gnd_ext_xiehe_raw_data)
)
select
yyyymm
,iqvia_pack_code
,AUDIT_COD
,city
,province
,replace(h_level,'.0','') h_level
,reimburse
,null as reimburse_type
,prescription_source
,prescription_dept_lv1
,prescription_dept_lv2
,prescription_dept_lv3
,prescription_dept_lv3 dept_name
,new_code
,atc
,common_name
,product_name
,manu_des
,pack_des
,drug_delivery_route
,nfc
,PackSize
,PackageType
,sum(prescription) prescription
,SUM(sales_vol) counting_unit
,null as price
,SUM(sales_value) sales_value
,sum(prescription_ly) prescription_ly
,SUM(sales_vol_ly) counting_unit_ly
,null as price_ly
,SUM(sales_value_ly) sales_value_ly
,from_utc_timestamp(current_timestamp(),'UTC+8') etl_insert_dt
,from_utc_timestamp(current_timestamp(),'UTC+8') etl_update_dt
from underlying_ly_data
group by
yyyymm
,iqvia_pack_code
,AUDIT_COD
,city
,province
,h_level
,reimburse
-- ,null as reimburse_type
,prescription_source
,prescription_dept_lv1
,prescription_dept_lv2
,prescription_dept_lv3
,prescription_dept_lv3
,new_code
,atc
,common_name
,product_name
,manu_des
,pack_des
,drug_delivery_route
,nfc
,PackSize
,PackageType
-- COMMAND ----------
-- MAGIC %md
-- MAGIC ### dm
-- COMMAND ----------
insert overwrite dm.dm_ext_xiehe_sales
select
yyyymm
,iqvia_pack_code
,AUDIT_COD
,city
,province
,h_level
,reimburse
,reimburse_type
,prescription_source
,prescription_dept_lv1
,prescription_dept_lv2
,prescription_dept_lv3
,dept_name
,new_code
,atc
,common_name
,product_name
,manu_des
,pack_des
,drug_delivery_route
,nfc
,prescription
,counting_unit
,price
,sales_value
,prescription_ly
,counting_unit_ly
,price_ly
,sales_value_ly
from dws.dws_ext_xiehe_raw_data

View File

@@ -0,0 +1,8 @@
-- Databricks notebook source
insert overwrite table dm.dm_td_xiehe_core_dept
select distinct
MarketName as MARKET,
Dept_NAME as DEPT_NAME,
Defined_Dept as CORE_DEPT,
'XH Data(Quarterly)' as DATA_SOURCE
from dwd.dwd_gnd_xiehe_core_dept

View File

@@ -0,0 +1,170 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TF_EXT_XIEHE_SALES (
-- YYYYMM STRING,
-- PACK_CODE STRING,
-- CORP_CODE STRING,
-- AUDIT_CODE STRING,
-- PLATFORM_TYPE STRING,
-- STORE_NAME STRING,
-- STORE_TYPE STRING,
-- REGION_TYPE STRING,
-- PACK_FLAG INT,
-- PROD_FLAG INT,
-- DTP_FLAG INT,
-- SALES_UNIT_CAL DECIMAL(38,10),
-- SALES_UNIT_CAL_LY DECIMAL(38,10),
-- SALES_VALUE_CAL DECIMAL(38,10),
-- SALES_VALUE_CAL_LY DECIMAL(38,10),
-- CONUTING_UNIT DECIMAL(38,10),
-- CONUTING_UNIT_LY DECIMAL(38,10),
-- DATA_SOURCE STRING,
-- INST_CODE STRING COMMENT '内部机构编码',
-- CMPS_FLAG STRING COMMENT '分子式标签',
-- DEPT_NAME STRING COMMENT '科室名称',
-- PRESCRIPTION DECIMAL(38,10) COMMENT '处方张数',
-- PRESCRIPTION_LY DECIMAL(38,10) COMMENT '去年同期处方张数',
-- NEW_CODE STRING COMMENT '主数据关联CODE',
-- AREA STRING COMMENT '城市',
-- H_LEVEL STRING COMMENT '医院类型',
-- REIMBURSE STRING COMMENT '报销情况',
-- REIMBURSE_TYPE STRING COMMENT '报销类型',
-- PRESCRIPTION_SOURCE STRING COMMENT '处方来源',
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_xiehe_sales';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_tf_ext_xiehe_sales';
-- COMMAND ----------
CREATE OR REPLACE TEMPORARY VIEW TF_XIEHE
AS
SELECT
A.YYYYMM,
A.iqvia_pack_code AS PACK_CODE,
A.AUDIT_COD AS AUDIT_CODE,
'XH Data(Quarterly)' DATA_SOURCE,
-------------------------------------
0 SALES_UNIT_CAL,
0 SALES_UNIT_CAL_LY,
SUM(sales_value) SALES_VALUE_CAL,
SUM(sales_value_LY) SALES_VALUE_CAL_LY,
SUM(A.counting_unit) CONUTING_UNIT,
SUM(A.counting_unit_ly) CONUTING_UNIT_LY,
null as prescription,
null as prescription_ly,
-------------------------------------
--Retail藏数逻辑标签------------------
1 PACK_FLAG,
1 PROD_FLAG,
0 DTP_FLAG,
null as cmps_flag,
-------------------------------------
--EC数据标签--------------------------
'' PLATFORM_TYPE,
'' STORE_NAME,
'' STORE_TYPE,
-------------------------------------
--COUNTY数据标签----------------------
'' REGION_TYPE,
-------------------------------------
--AIA数据标签-------------------------
null inst_code, --使
-------------------------------------
--XIE HE 数据标签---------------------
NVL(A.dept_name, '') dept_name,
NVL(A.new_code, '') new_code,
NVL(A.province, '') area,
NVL(A.h_level, '') h_level,
NVL(A.reimburse, '') reimburse,
NVL(A.reimburse_type, '') reimburse_type,
NVL(A.prescription_source, '') prescription_source,
-------------------------------------
'' as CORP_CODE --可以移除,报告没有使用
-------------------------------------
FROM
dm.dm_ext_xiehe_sales A
WHERE
A.YYYYMM >= '202001'
GROUP BY
A.YYYYMM,
A.iqvia_pack_code,
A.AUDIT_COD,
A.dept_name,
A.new_code,
A.province,
A.h_level,
A.reimburse,
A.reimburse_type,
A.prescription_source
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TF_EXT_XIEHE_SALES (
YYYYMM,
PACK_CODE,
AUDIT_CODE,
DATA_SOURCE,
SALES_UNIT_CAL,
SALES_UNIT_CAL_LY,
SALES_VALUE_CAL,
SALES_VALUE_CAL_LY,
CONUTING_UNIT,
CONUTING_UNIT_LY,
PRESCRIPTION,
PRESCRIPTION_LY,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
INST_CODE,
DEPT_NAME,
NEW_CODE,
AREA,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE,
CORP_CODE,
ETL_INSERT_DT,
ETL_UPDATE_DT
)
SELECT
YYYYMM,
NVL(NULLIF(PACK_CODE, ''), CONCAT('PACK_CODE_', DATA_SOURCE)) AS PACK_CODE,
NVL(NULLIF(AUDIT_CODE, ''), 'ROC') AS AUDIT_CODE,
DATA_SOURCE,
SALES_UNIT_CAL,
SALES_UNIT_CAL_LY,
SALES_VALUE_CAL,
SALES_VALUE_CAL_LY,
CONUTING_UNIT,
CONUTING_UNIT_LY,
PRESCRIPTION,
PRESCRIPTION_LY,
PACK_FLAG,
PROD_FLAG,
DTP_FLAG,
CMPS_FLAG,
PLATFORM_TYPE,
STORE_NAME,
STORE_TYPE,
REGION_TYPE,
INST_CODE,
DEPT_NAME,
NEW_CODE,
AREA,
H_LEVEL,
REIMBURSE,
REIMBURSE_TYPE,
PRESCRIPTION_SOURCE,
CORP_CODE,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
TF_XIEHE;

View File

@@ -0,0 +1,146 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_XIEHE_PACK_PROPERTY
-- (
-- -- -- MARKET_PACK_KEY STRING,
-- PACK_CODE STRING,
-- PACK_DESC STRING,
-- STGH_DESC STRING,
-- PACK_LCH STRING,
-- FAMILY_CODE STRING,
-- FAMILY_NAME STRING,
-- PROD_CODE STRING,
-- PROD_DESC STRING,
-- PROD_DESC_C STRING,
-- CMPS_CODE STRING,
-- CMPS_DESC STRING,
-- CMPS_DESC_C STRING,
-- ATC1_CODE STRING,
-- ATC2_CODE STRING,
-- ATC3_CODE STRING,
-- ATC4_CODE STRING,
-- APP1_CODE STRING,
-- APP2_CODE STRING,
-- APP3_CODE STRING,
-- BIO_DESC STRING,
-- GENE_ORIG_DESC STRING,
-- ETH_OTC_DESC STRING,
-- NRDL_DESC STRING,
-- NRDL_ENTRY_DATE STRING,
-- EDL_DESC STRING,
-- TCM_DESC STRING,
-- PAED_DESC STRING,
-- GQCE_DESC STRING,
-- VBP_DESC_V STRING,
-- VBP_DESC STRING,
-- MANU_CODE STRING,
-- MANU_DESC STRING,
-- MANU_DESC_C STRING,
-- MNFL_CODE STRING,
-- MNFL_DESC STRING,
-- CORP_CODE STRING,
-- CORP_DESC STRING,
-- CORP_DESC_C STRING,
-- BRANDTYPE STRING,
-- -- MARKET STRING,
-- -- KEY_COMPETITOR STRING,
-- IS_AZ STRING,
-- AZ_MAIN STRING,
-- -- AZ_RELATED STRING,
-- ATC1_DESC STRING,
-- ATC1_DESC_C STRING,
-- ATC2_DESC STRING,
-- ATC2_DESC_C STRING,
-- ATC3_DESC STRING,
-- ATC3_DESC_C STRING,
-- ATC4_DESC STRING,
-- ATC4_DESC_C STRING,
-- APP1_DESC STRING,
-- APP1_DESC_C STRING,
-- APP2_DESC STRING,
-- APP2_DESC_C STRING,
-- APP3_DESC STRING,
-- APP3_DESC_C STRING,
-- -- CLASS STRING,
-- -- MARKET_RATIO STRING,
-- COUNTINGUNIT STRING,
-- VBP_BRAND STRING,
-- REPLENISH_FALG STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack_property';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack_property';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_XIEHE_PACK_PROPERTY
SELECT
-- MARKET_PACK_KEY,
PACK_COD AS PACK_CODE,
PACK_DES AS PACK_DESC,
STGH_DES AS STGH_DESC,
PACK_LCH,
FAMILY_CODE,
FAMILY_NAME,
PROD_COD AS PROD_CODE,
PROD_DES AS PROD_DESC,
PROD_DES_C AS PROD_DESC_C,
CMPS_COD AS CMPS_CODE,
CMPS_DES AS CMPS_DESC,
CMPS_DES_C AS CMPS_DESC_C,
ATC1_COD AS ATC1_CODE,
ATC2_COD AS ATC2_CODE,
ATC3_COD AS ATC3_CODE,
ATC4_COD AS ATC4_CODE,
APP1_COD AS APP1_CODE,
APP2_COD AS APP2_CODE,
APP3_COD AS APP3_CODE,
BIO_DESC,
GENE_ORIG_DESC,
ETH_OTC_DESC,
NRDL_DESC,
NRDL_ENTRY_DATE,
EDL_DESC,
TCM_DESC,
PAED_DESC,
GQCE_DESC,
null as VBP_DESC_V,
VBP_DESC,
MANU_COD AS MANU_CODE,
MANU_DES AS MANU_DESC,
MANU_DES_C AS MANU_DESC_C,
MNFL_COD AS MNFL_CODE,
MNFL_DES AS MNFL_DESC,
CORP_COD AS CORP_CODE,
CORP_DES AS CORP_DESC,
CORP_DES_C AS CORP_DESC_C,
BRANDTYPE,
-- MARKET,
-- KEY_COMPETITOR,
IS_AZ,
AZ_MAIN,
-- AZ_RELATED,
ATC1_DES AS ATC1_DESC,
ATC1_DES_C AS ATC1_DESC_C,
ATC2_DES AS ATC2_DESC,
ATC2_DES_C AS ATC2_DESC_C,
ATC3_DES AS ATC3_DESC,
ATC3_DES_C AS ATC3_DESC_C,
ATC4_DES AS ATC4_DESC,
ATC4_DES_C AS ATC4_DESC_C,
APP1_DES AS APP1_DESC,
APP1_DES_C AS APP1_DESC_C,
APP2_DES AS APP2_DESC,
APP2_DES_C AS APP2_DESC_C,
APP3_DES AS APP3_DESC,
APP3_DES_C AS APP3_DESC_C,
-- CLASS,
-- NULL AS MARKET_RATIO,
COUNTINGUNIT,
NULL AS VBP_BRAND,
NULL AS REPLENISH_FALG,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_XIEHE_PACK_PROPERTY;

View File

@@ -0,0 +1,326 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_XIEHE_PACK2MARKET (
-- MARKET STRING,
-- PACK_CODE STRING,
-- PACK_DESC STRING,
-- STGH_DESC STRING,
-- PACK_LCH STRING,
-- PROD_CODE STRING,
-- CMPS_CODE STRING,
-- CMPS_DESC STRING,
-- ATC1_CODE STRING,
-- ATC2_CODE STRING,
-- ATC3_CODE STRING,
-- ATC4_CODE STRING,
-- APP1_CODE STRING,
-- APP2_CODE STRING,
-- APP3_CODE STRING,
-- BIO_DESC STRING,
-- GENE_ORIG_DESC STRING,
-- ETH_OTC_DESC STRING,
-- NRDL_DESC STRING,
-- NRDL_ENTRY_DATE STRING,
-- EDL_DESC STRING,
-- TCM_DESC STRING,
-- PAED_DESC STRING,
-- GQCE_DESC STRING,
-- VBP_DESC STRING,
-- MANU_CODE STRING,
-- MANU_DESC STRING,
-- MNFL_CODE STRING,
-- MNFL_DESC STRING,
-- CORP_CODE STRING,
-- CORP_DESC STRING,
-- BRANDTYPE STRING,
-- BU STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- MARKET_RATIO STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack2market';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack2market';
-- COMMAND ----------
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_xiehe_PACK2MARKET_KEYCOMPETITOR (
-- KEY_COMPETITOR STRING,
-- MARKET STRING,
-- PACK_CODE STRING)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack2market_keycompetitor';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack2market_keycompetitor';
-- COMMAND ----------
-- CREATE or REPLACE TABLE DM.DM_TD_EXT_xiehe_PACK2MARKET_CLASS (
-- MARKET STRING,
-- PACK_CODE STRING,
-- CLASS STRING)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack2market_class';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_pack2market_class';
-- COMMAND ----------
-- 使用 EXCEPT集合差操作简化逻辑
INSERT OVERWRITE TABLE DM.DM_TD_EXT_XIEHE_PACK2MARKET
-- 包含规则
SELECT DISTINCT
T2.MARKET,
T1.PACK_CODE,
T1.PACK_DESC,
T1.STGH_DESC,
T1.PACK_LCH,
T1.PROD_CODE,
T1.CMPS_CODE,
T1.CMPS_DESC,
T1.ATC1_CODE,
T1.ATC2_CODE,
T1.ATC3_CODE,
T1.ATC4_CODE,
T1.APP1_CODE,
T1.APP2_CODE,
T1.APP3_CODE,
T1.BIO_DESC,
T1.GENE_ORIG_DESC AS GENE_ORIG,
T1.ETH_OTC_DESC AS ETH_OTC_D,
T1.NRDL_DESC,
T1.NRDL_ENTRY_DATE AS NRDL_ENTR,
T1.EDL_DESC,
T1.TCM_DESC,
T1.PAED_DESC,
T1.GQCE_DESC,
T1.VBP_DESC,
T1.MANU_CODE,
T1.MANU_DESC,
T1.MNFL_CODE,
T1.MNFL_DESC,
T1.CORP_CODE,
T1.CORP_DESC,
T1.BRANDTYPE,
T2.BU,
T2.STARTTIME,
T2.ENDTIME,
COALESCE(T2.EXTEND_MARKET_RATIO, '1') AS MARKET_RATIO,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_XIEHE_PACK_PROPERTY AS T1
LEFT JOIN (
SELECT
MARKET,
BU,
STARTTIME,
ENDTIME,
EXTEND_MARKET_RATIO,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
NFC1_CODE,
NFC2_CODE,
NFC3_CODE,
CORPORATION_CODE,
MANUFACTURER_CODE,
PRODUCT_CODE,
PACK_CODE,
STRENGTH,
MOLECULE_CODE
FROM DWD.DWD_GND_IMS_TBLMARKET_BYMONTH
WHERE EXTEND_MARKET IS NULL AND (NOT_IN_FLAG IS NULL OR NOT_IN_FLAG = '1')
) AS T2
ON (T2.ATC1_CODE IS NULL OR T1.ATC1_CODE = T2.ATC1_CODE)
AND (T2.ATC2_CODE IS NULL OR T1.ATC2_CODE = T2.ATC2_CODE)
AND (T2.ATC3_CODE IS NULL OR T1.ATC3_CODE = T2.ATC3_CODE)
AND (T2.ATC4_CODE IS NULL OR T1.ATC4_CODE = T2.ATC4_CODE)
AND (T2.NFC1_CODE IS NULL OR T1.APP1_CODE = T2.NFC1_CODE)
AND (T2.NFC2_CODE IS NULL OR T1.APP2_CODE = T2.NFC2_CODE)
AND (T2.NFC3_CODE IS NULL OR T1.APP3_CODE = T2.NFC3_CODE)
AND (T2.CORPORATION_CODE IS NULL OR T1.CORP_CODE = T2.CORPORATION_CODE)
AND (T2.MANUFACTURER_CODE IS NULL OR T1.MANU_CODE = T2.MANUFACTURER_CODE)
AND (T2.PRODUCT_CODE IS NULL OR T1.PROD_CODE = T2.PRODUCT_CODE)
AND (T2.PACK_CODE IS NULL OR T1.PACK_CODE = T2.PACK_CODE)
AND (T2.STRENGTH IS NULL OR T1.STGH_DESC = T2.STRENGTH)
AND (T2.MOLECULE_CODE IS NULL OR T1.CMPS_CODE = T2.MOLECULE_CODE)
WHERE T2.MARKET IS NOT NULL
EXCEPT
-- 排除规则
SELECT DISTINCT
T2.MARKET,
T1.PACK_CODE,
T1.PACK_DESC,
T1.STGH_DESC,
T1.PACK_LCH,
T1.PROD_CODE,
T1.CMPS_CODE,
T1.CMPS_DESC,
T1.ATC1_CODE,
T1.ATC2_CODE,
T1.ATC3_CODE,
T1.ATC4_CODE,
T1.APP1_CODE,
T1.APP2_CODE,
T1.APP3_CODE,
T1.BIO_DESC,
T1.GENE_ORIG_DESC AS GENE_ORIG,
T1.ETH_OTC_DESC AS ETH_OTC_D,
T1.NRDL_DESC,
T1.NRDL_ENTRY_DATE AS NRDL_ENTR,
T1.EDL_DESC,
T1.TCM_DESC,
T1.PAED_DESC,
T1.GQCE_DESC,
T1.VBP_DESC,
T1.MANU_CODE,
T1.MANU_DESC,
T1.MNFL_CODE,
T1.MNFL_DESC,
T1.CORP_CODE,
T1.CORP_DESC,
T1.BRANDTYPE,
T2.BU,
T2.STARTTIME,
T2.ENDTIME,
COALESCE(T2.EXTEND_MARKET_RATIO, '1') AS MARKET_RATIO,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_XIEHE_PACK_PROPERTY AS T1
LEFT JOIN (
SELECT
MARKET,
BU,
STARTTIME,
ENDTIME,
EXTEND_MARKET_RATIO,
ATC1_CODE,
ATC2_CODE,
ATC3_CODE,
ATC4_CODE,
NFC1_CODE,
NFC2_CODE,
NFC3_CODE,
CORPORATION_CODE,
MANUFACTURER_CODE,
PRODUCT_CODE,
PACK_CODE,
STRENGTH,
MOLECULE_CODE
FROM DWD.DWD_GND_IMS_TBLMARKET_BYMONTH
WHERE EXTEND_MARKET IS NULL AND NOT_IN_FLAG = '0'
) AS T2
ON (T2.ATC1_CODE IS NULL OR T1.ATC1_CODE = T2.ATC1_CODE)
AND (T2.ATC2_CODE IS NULL OR T1.ATC2_CODE = T2.ATC2_CODE)
AND (T2.ATC3_CODE IS NULL OR T1.ATC3_CODE = T2.ATC3_CODE)
AND (T2.ATC4_CODE IS NULL OR T1.ATC4_CODE = T2.ATC4_CODE)
AND (T2.NFC1_CODE IS NULL OR T1.APP1_CODE = T2.NFC1_CODE)
AND (T2.NFC2_CODE IS NULL OR T1.APP2_CODE = T2.NFC2_CODE)
AND (T2.NFC3_CODE IS NULL OR T1.APP3_CODE = T2.NFC3_CODE)
AND (T2.CORPORATION_CODE IS NULL OR T1.CORP_CODE = T2.CORPORATION_CODE)
AND (T2.MANUFACTURER_CODE IS NULL OR T1.MANU_CODE = T2.MANUFACTURER_CODE)
AND (T2.PRODUCT_CODE IS NULL OR T1.PROD_CODE = T2.PRODUCT_CODE)
AND (T2.PACK_CODE IS NULL OR T1.PACK_CODE = T2.PACK_CODE)
AND (T2.STRENGTH IS NULL OR T1.STGH_DESC = T2.STRENGTH)
AND (T2.MOLECULE_CODE IS NULL OR T1.CMPS_CODE = T2.MOLECULE_CODE)
WHERE T2.MARKET IS NOT NULL;
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_xiehe_PACK2MARKET_KEYCOMPETITOR
WITH kc AS (
SELECT
keycompetitor, no,
ROW_NUMBER() OVER (ORDER BY
CASE
WHEN ATC1_Code IS NOT NULL THEN 1
WHEN ATC2_Code IS NOT NULL THEN 2
WHEN ATC3_Code IS NOT NULL THEN 3
WHEN ATC4_Code IS NOT NULL THEN 4
WHEN Molecule_Code IS NOT NULL THEN 5
WHEN Product_Code IS NOT NULL THEN 6
WHEN Pack_Code IS NOT NULL THEN 7
ELSE 999
END,
CASE WHEN NFC1_CODE IS NULL THEN 0 ELSE 1 END,
CASE WHEN NFC2_CODE IS NULL THEN 0 ELSE 1 END,
CASE WHEN NFC3_CODE IS NULL THEN 0 ELSE 1 END
) AS no1,
ATC1_Code, ATC2_Code, ATC3_Code, ATC4_Code,
NFC1_Code, NFC2_Code, NFC3_Code,
corporation_code, Manufacturer_Code,
Product_Code, Pack_Code, Strength, Molecule_Code, market
FROM dwd.dwd_gnd_ims_tblkeycompetitor
),
joined AS (
SELECT
/*+ BROADCAST(kc) */
COALESCE(kc.keycompetitor, 'OTHERS') AS Key_Competitor, -- ✅ UPDATE 逻辑内联
t1.market, t1.PACK_CODE
/*bug 调试区
,
kc.no,
kc.no1,
t1.PROD_COD, t1.CMPS_COD, t1.CMPS_DES,
t1.ATC1_COD, t1.ATC2_COD, t1.ATC3_COD, t1.ATC4_COD,
t1.APP1_COD, t1.APP2_COD, t1.APP3_COD,
t1.BIO_DESC, t1.GENE_ORIG_DESC, t1.ETH_OTC_DESC,
t1.NRDL_DESC, t1.NRDL_Entry_Date, t1.EDL_DESC, t1.TCM_DESC,
t1.PAED_DESC, t1.GQCE_DESC, t1.VBP_DESC,
t1.MANU_COD, t1.MANU_DES, t1.MNFL_COD, t1.MNFL_DES,
t1.CORP_COD, t1.CORP_DES, t1.BrandType, t1.bu, t1.Market_Ratio
*/
,ROW_NUMBER() OVER (
PARTITION BY t1.market, t1.PACK_CODE, t1.PROD_CODE
ORDER BY kc.no1 DESC, kc.no DESC
) AS _rn
FROM DM.DM_TD_EXT_xiehe_PACK2MARKET t1
LEFT JOIN kc
ON (kc.ATC1_Code IS NULL OR t1.ATC1_CODE = kc.ATC1_Code)
AND (kc.ATC2_Code IS NULL OR t1.ATC2_CODE = kc.ATC2_Code)
AND (kc.ATC3_Code IS NULL OR t1.ATC3_CODE = kc.ATC3_Code)
AND (kc.ATC4_Code IS NULL OR t1.ATC4_CODE = kc.ATC4_Code)
AND (kc.NFC1_Code IS NULL OR t1.APP1_CODE = kc.NFC1_Code)
AND (kc.NFC2_Code IS NULL OR t1.APP2_CODE = kc.NFC2_Code)
AND (kc.NFC3_Code IS NULL OR t1.APP3_CODE = kc.NFC3_Code)
AND (kc.corporation_code IS NULL OR t1.CORP_CODE = kc.corporation_code)
AND (kc.Manufacturer_Code IS NULL OR t1.MANU_CODE = kc.Manufacturer_Code)
AND (kc.Product_Code IS NULL OR t1.PROD_CODE = kc.Product_Code)
AND (kc.Pack_Code IS NULL OR t1.PACK_CODE = kc.Pack_Code)
AND (kc.Strength IS NULL OR t1.STGH_DESC = kc.Strength)
AND (kc.Molecule_Code IS NULL OR t1.CMPS_CODE = kc.Molecule_Code)
AND (kc.market IS NULL OR t1.market = kc.market)
)
SELECT distinct Key_Competitor,market,pack_code
FROM joined
WHERE _rn = 1;
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_xiehe_PACK2MARKET_CLASS
SELECT
/*+ BROADCAST(T2) */
DISTINCT
T1.MARKET,
T1.PACK_CODE,
COALESCE( T2.CLASS, 'OTHERS') AS CLASS
FROM DM.DM_TD_EXT_xiehe_PACK2MARKET T1
LEFT JOIN DWD.DWD_GND_TBLCLASS T2
ON (T2.MARKET IS NULL OR T1.MARKET = T2.MARKET)
AND (T2.PACK_CODE IS NULL OR T1.PACK_CODE = T2.PACK_CODE)
AND (T2.PRODUCT_CODE IS NULL OR T1.PROD_CODE = T2.PRODUCT_CODE)
AND (T2.MOLECULE_CODE IS NULL OR T1.CMPS_CODE = T2.MOLECULE_CODE)
AND (T2.CORPORATION_CODE IS NULL OR T1.CORP_CODE = T2.CORPORATION_CODE)
AND (T2.MANUFACTURER_CODE IS NULL OR T1.MANU_CODE = T2.MANUFACTURER_CODE)
AND (T2.ATC1_CODE IS NULL OR T1.ATC1_CODE = T2.ATC1_CODE)
AND (T2.ATC2_CODE IS NULL OR T1.ATC2_CODE = T2.ATC2_CODE)
AND (T2.ATC3_CODE IS NULL OR T1.ATC3_CODE = T2.ATC3_CODE)
AND (T2.ATC4_CODE IS NULL OR T1.ATC4_CODE = T2.ATC4_CODE)
AND (T2.NFC1_CODE IS NULL OR T1.APP1_CODE = T2.NFC1_CODE)
AND (T2.NFC2_CODE IS NULL OR T1.APP2_CODE = T2.NFC2_CODE)
AND (T2.NFC3_CODE IS NULL OR T1.APP3_CODE = T2.NFC3_CODE)
AND (T2.STRENGTH IS NULL OR T1.STGH_DESC = T2.STRENGTH)

View File

@@ -0,0 +1,30 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_XIEHE_MARKET_RATIO (
-- MARKET STRING,
-- PACK_CODE STRING,
-- CMPS_CODE STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- MARKET_RATIO STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_market_ratio';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_market_ratio';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_XIEHE_MARKET_RATIO
SELECT
DISTINCT
MARKET,
PACK_CODE,
CMPS_CODE,
STARTTIME,
ENDTIME,
MARKET_RATIO,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_XIEHE_PACK2MARKET
WHERE UPPER(MARKET) NOT LIKE '%ALL%'

View File

@@ -0,0 +1,163 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_XIEHE_MARKET_BRAND_RATIO (
-- MARKET STRING,
-- PACK_CODE STRING,
-- VALUE_BRAND_RATIO FLOAT,
-- VALUE_BRAND_RATIO_START STRING,
-- VALUE_BRAND_RATIO_END STRING,
-- UNIT_BRAND_RATIO FLOAT,
-- UNIT_BRAND_START STRING,
-- UNIT_BRAND_END STRING,
-- COUNTINGUNIT_BRAND_RATIO FLOAT,
-- COUNTINGUNIT_BRAND_START STRING,
-- COUNTINGUNIT_BRAND_END STRING,
-- PDOT_BRAND_RATIO FLOAT,
-- PDOT_BRAND_START STRING,
-- PDOT_BRAND_END STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP)
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_market_brand_ratio';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_market_brand_ratio';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_XIEHE_MARKET_BRAND_RATIO
SELECT
DISTINCT
T1.MARKET,
T1.PACK_CODE,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.RATIO
ELSE NVL(T5.RATIO, 1) END AS VALUE_BRAND_RATIO,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.STARTTIME
ELSE NVL(T5.STARTTIME, 200001) END AS VALUE_BRAND_RATIO_START,
CASE
WHEN T2.RATIO IS NOT NULL THEN T2.ENDTIME
ELSE NVL(T5.ENDTIME, 299912) END AS VALUE_BRAND_RATIO_END,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.RATIO
ELSE NVL(T6.RATIO, 1) END AS UNIT_BRAND_RATIO,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.STARTTIME
ELSE NVL(T6.STARTTIME, 200001) END AS UNIT_BRAND_START,
CASE
WHEN T3.RATIO IS NOT NULL THEN T3.ENDTIME
ELSE NVL(T6.ENDTIME, 299912) END AS UNIT_BRAND_END,
1 AS COUNTINGUNIT_BRAND_RATIO,
200001 AS COUNTINGUNIT_BRAND_START,
299912 AS COUNTINGUNIT_BRAND_END,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.RATIO
ELSE NVL(T7.RATIO, 1) END AS PDOT_BRAND_RATIO,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.STARTTIME
ELSE NVL(T7.STARTTIME, 200001) END AS PDOT_BRAND_START,
CASE
WHEN T4.RATIO IS NOT NULL THEN T4.ENDTIME
ELSE NVL(T7.ENDTIME, 299912) END AS PDOT_BRAND_END,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(),'UTC+8') AS ETL_UPDATE_DT
FROM DM.DM_TD_EXT_XIEHE_MARKET_RATIO T1
LEFT JOIN ( --VALUE--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'VALUE' AND UPPER(LEVEL) = 'PACK'
) T2 ON T1.MARKET = T2.MARKET AND T1.PACK_CODE = T2.PACK_COD
LEFT JOIN ( --UNIT--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'UNIT' AND UPPER(LEVEL) = 'PACK'
) T3 ON T1.MARKET = T3.MARKET AND T1.PACK_CODE = T3.PACK_COD
LEFT JOIN ( --COUNTINGUNIT--PACK
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'COUNTINGUNIT' AND UPPER(LEVEL) = 'PACK'
) T4 ON T1.MARKET = T4.MARKET AND T1.PACK_CODE = T4.PACK_COD
LEFT JOIN ( --VALUE--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'VALUE' AND UPPER(LEVEL) = 'MOLECULE'
) T5 ON T1.MARKET = T5.MARKET AND T1.CMPS_CODE = T5.CMPS_COD
LEFT JOIN ( --UNIT--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'UNIT' AND UPPER(LEVEL) = 'MOLECULE'
) T6 ON T1.MARKET = T6.MARKET AND T1.CMPS_CODE = T6.CMPS_COD
LEFT JOIN ( --COUNTINGUNIT--MOLECULE
SELECT
DISTINCT
MARKET,
CMPS_COD,
CMPS_DES,
PACK_COD,
PACK_DES,
LEVEL,
TYPE,
CAST(RATIO AS FLOAT) AS RATIO,
CAST(STARTTIME AS INT) STARTTIME,
CAST(ENDTIME AS INT) ENDTIME
FROM DWD.DWD_GND_IMS_TBLBRANDRATIO_BYMONTH
WHERE UPPER(TYPE) = 'COUNTINGUNIT' AND UPPER(LEVEL) = 'MOLECULE'
) T7 ON T1.MARKET = T7.MARKET
AND T1.CMPS_CODE = T7.CMPS_COD
-- COMMAND ----------

View File

@@ -0,0 +1,71 @@
-- Databricks notebook source
-- CREATE OR REPLACE TABLE DM.DM_TD_EXT_XIEHE_MARKET_PACK_MAPPING
-- (
-- PACK_CODE STRING,
-- MARKET STRING,
-- CLASS STRING,
-- KEY_COMPETITOR STRING,
-- MARKET_RATIO STRING,
-- STARTTIME STRING,
-- ENDTIME STRING,
-- VALUE_BRAND_RATIO STRING,
-- VALUE_BRAND_RATIO_START STRING,
-- VALUE_BRAND_RATIO_END STRING,
-- UNIT_BRAND_RATIO STRING,
-- UNIT_BRAND_START STRING,
-- UNIT_BRAND_END STRING,
-- COUNTINGUNIT_BRAND_RATIO STRING,
-- COUNTINGUNIT_BRAND_START STRING,
-- COUNTINGUNIT_BRAND_END STRING,
-- PDOT_BRAND_RATIO STRING,
-- PDOT_BRAND_START STRING,
-- PDOT_BRAND_END STRING,
-- ETL_INSERT_DT TIMESTAMP,
-- ETL_UPDATE_DT TIMESTAMP
-- )
-- USING delta
-- -- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_market_pack_mapping';
-- -- 上面是生产环境location下面是测试环境location
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DM/dm_td_ext_xiehe_market_pack_mapping';
-- COMMAND ----------
INSERT OVERWRITE TABLE DM.DM_TD_EXT_XIEHE_MARKET_PACK_MAPPING
SELECT DISTINCT
T1.PACK_CODE,
T1.MARKET,
T4.CLASS,
T5.KEY_COMPETITOR,
T2.MARKET_RATIO,
T2.STARTTIME,
T2.ENDTIME,
T3.VALUE_BRAND_RATIO,
T3.VALUE_BRAND_RATIO_START,
T3.VALUE_BRAND_RATIO_END,
T3.UNIT_BRAND_RATIO,
T3.UNIT_BRAND_START,
T3.UNIT_BRAND_END,
T3.COUNTINGUNIT_BRAND_RATIO,
T3.COUNTINGUNIT_BRAND_START,
T3.COUNTINGUNIT_BRAND_END,
T3.PDOT_BRAND_RATIO,
T3.PDOT_BRAND_START,
T3.PDOT_BRAND_END,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_INSERT_DT,
FROM_UTC_TIMESTAMP(CURRENT_TIMESTAMP(), 'UTC+8') AS ETL_UPDATE_DT
FROM
DM.DM_TD_EXT_XIEHE_PACK2MARKET T1
LEFT JOIN DM.DM_TD_EXT_XIEHE_MARKET_RATIO T2
ON T1.MARKET = T2.MARKET
AND T1.PACK_CODE = T2.PACK_CODE
LEFT JOIN DM.DM_TD_EXT_XIEHE_MARKET_BRAND_RATIO T3
ON T1.MARKET = T3.MARKET
AND T1.PACK_CODE = T3.PACK_CODE
LEFT JOIN DM.DM_TD_EXT_XIEHE_PACK2MARKET_CLASS T4
ON T1.PACK_CODE = T4.PACK_CODE
AND T1.MARKET = T4.MARKET
LEFT JOIN DM.DM_TD_EXT_XIEHE_PACK2MARKET_KEYCOMPETITOR T5
ON T1.PACK_CODE = T5.PACK_CODE
AND T1.MARKET = T5.MARKET
WHERE
UPPER(T1.MARKET) NOT LIKE "%ALL%"

View File

@@ -0,0 +1,478 @@
# Databricks notebook source
# MAGIC %run ../../../Common/config
# COMMAND ----------
# MAGIC %sql
# MAGIC insert overwrite dwd.dwd_gnd_xiehe_config_table_mapping
# MAGIC select id,file_name,
# MAGIC concat('dwd.dwd_gnd_xiehe_', id) as table_name,
# MAGIC from_utc_timestamp(current_timestamp(),'UTC+8') as etl_insert_dt
# MAGIC from dwd.dwd_gnd_xiehe_config_table
# COMMAND ----------
# MAGIC %md
# MAGIC + 20250415 kzzh331 uc_upgrade uc兼容调整
# MAGIC 1. 将代码中的`DBFS`路径根调整为`Volume`路径
# MAGIC 2. 将`session`中安装的`mdb`工具调整到集群`init-script`中,`uc`集群可能需要`admin`权限才能在集群中安装程序
# MAGIC 3. 清理重复的`access_file_path_template`赋值,根据环境自动设置`Blob Container`
# COMMAND ----------
# %sh
# # 安装 mdbtools
# sudo apt-get update
# sudo apt-get install -y apt-utils
# sudo DEBIAN_FRONTEND=noninteractive apt-get install -y mdbtools
# COMMAND ----------
# import os
# from pyspark.sql import SparkSession
# # 初始化SparkSession
# spark = SparkSession.builder.getOrCreate()
# # 列出最新日期目录下的所有 Blob 文件
# def list_latest_blob_files(base_path_template):
# try:
# current_date = datetime.utcnow() + timedelta(hours=8)
# date_path = current_date.strftime("%Y/%m/%d/")
# base_path = base_path_template + date_path
# if path_exists(base_path):
# all_files = list_files_recursive(base_path)
# access_files = [file for file in all_files if is_access_file(file)]
# if not access_files:
# print("最新日期目录下未找到 Access 文件。")
# return []
# else:
# return access_files
# else:
# print(f"指定路径不存在: {base_path}")
# return []
# except Exception as e:
# print(f"列出路径模板 {base_path_template} 中的最新 Blob 文件时出错: {e}")
# raise
# # 设置原始路径模板
# # 测试路径
# # access_file_path_template = "abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
# # 生产路径
# access_file_path_template = "abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
# # Blob存储的文件夹路径
# blob_folder_path = list_latest_blob_files(access_file_path_template)
# # 遍历Blob存储上的文件夹和子文件夹
# file_list = []
# for root, dirs, files in os.walk(blob_folder_path):
# for file in files:
# file_list.append((root, file))
# # 创建DataFrame
# df = spark.createDataFrame(file_list, ["folder_path", "file_name"])
# # 将DataFrame写入临时表
# df.createOrReplaceTempView("temp_table")
# COMMAND ----------
# # 730 修改在一半版本
# from io import StringIO
# import pandas as pd
# from pyspark.sql import SparkSession
# from subprocess import run, PIPE
# import subprocess
# from pyspark.sql import SparkSession
# from datetime import datetime
# #获取所有blob文件目录地址并复制到 dbfs 的指定目录下
# def get_access_file_path_dbfs_list(access_file_path_blob,target_path):
# access_file_path_dbfs_list =[]
# # 使用 dbutils.fs.ls 遍历目录
# for file_info in dbutils.fs.ls(access_file_path_blob):
# if file_info.isDir() and file_info.path.endswith('/') :
# # 确保是目录
# blob_path = file_info.path
# for file_name in dbutils.fs.ls(file_info.path):
# if file_name.path.lower().endswith('xiehe.accdb') & file_name.name.lower().startswith('database') :
# dbutils.fs.cp(file_name.path,f"{target_path}/{file_name.name}" )
# access_file_path_dbfs_list.append(f"{target_path}/{file_name.name}")
# return access_file_path_dbfs_list
# import subprocess
# # 将access 文件转化成csv
# def access_file_to_csv(access_file_path_blob,target_path,output_dir):
# access_file_path_dbfs_list = get_access_file_path_dbfs_list(access_file_path_blob,target_path)
# print(access_file_path_dbfs_list)
# for access_path in access_file_path_dbfs_list:
# # 首先列出所有表名
# result = subprocess.run(['mdb-tables', '-1', f"/dbfs{access_path}"], stdout=subprocess.PIPE, text=True)
# print(result.returncode)
# if result.returncode != 0:
# print(f"Error listing tables: {result.stderr}")
# return
# table_names = result.stdout.strip().split('\n')
# print(table_names)
# # 遍历表并保存至csv ,以下代码有问题----修需改
# for table_name in table_names:
# csv_path = f"{output_dir}/{table_name}.csv"
# export_result = subprocess.run(['mdb-export', f"/dbfs{access_path}", table_name], stdout=subprocess.PIPE, text=True)
# with open(csv_path, 'w') as file:
# file.write(export_result.stdout)
# if export_result.returncode != 0:
# print(f"Error exporting {table_name} to {csv_path}: {export_result.stderr}")
# # 以上代码有问题----修需改
# # 还需功能,实现 读取csv 文件写入对应DWD 表,并基于数据时间对 结果表数据进行数据覆盖
# def main(access_file_path_blob,target_path,output_dir):
# access_file_to_csv(access_file_path_blob,target_path,output_dir)
# if __name__ == "__main__":
# # 原始路径模板,使用年月日替换
# access_file_path_template = "abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/{date}/"
# today = datetime.now().strftime("%Y/%m/%d")
# access_file_path_blob = access_file_path_template.format(date=today)
# main(access_file_path_blob,"/dbfs/tmp" ,"/dbfs/tmp")
# COMMAND ----------
import os
import subprocess
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
from pyspark.sql.functions import expr, date_format, lit
from queue import Queue
# 增加一个全局变量,保存线程中生成的临时文件名称
TEMP_FILES = Queue()
# 下载 Access 文件到本地
def download_access_file(file_path, local_path):
# dbutils.fs.cp(file_path, local_path.replace("/dbfs", ""))
dbutils.fs.cp(file_path, local_path)
print(f"已下载 {file_path}{local_path}")
return local_path
# 检查文件是否存在
def file_exists(local_path):
return os.path.exists(local_path)
# 列出 Access 文件中的表
def list_tables_in_access_file(local_path):
if not file_exists(local_path):
raise FileNotFoundError(f"文件未找到: {local_path}")
result = subprocess.run(['mdb-tables', '-1', local_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
error_message = result.stderr.decode('utf-8')
raise RuntimeError(f"列出表名时出错: {error_message}")
table_names = result.stdout.decode('utf-8').strip().split('\n')
print(f"{local_path} 中的表: {table_names}")
return table_names
# 读取每个表的数据并转换为 Pandas DataFrame
def read_table_from_access(local_path, table_name):
local_dirname = os.path.dirname(local_path)
local_filename = os.path.basename(local_path).split('.')[0]
temp_file_path = f"{local_dirname}/mdb_export_{local_filename}_{table_name}.csv"
try:
# 将数据导出到临时文件
result = subprocess.run(
["mdb-export", local_path, table_name], stdout=open(temp_file_path, "w")
)
# 读取csv文件并清理列名
df = (
spark.read.option("header", "true")
.option("quote", '"')
.option("escape", '"')
.option("multiLine", "true")
.option("mode", "PERMISSIVE")
.csv(temp_file_path)
)
# 清理列名
for col in df.columns:
clean_col = (
col.replace(" ", "_")
.replace(",", "_")
.replace(";", "_")
.replace("{", "_")
.replace("}", "_")
.replace("(", "_")
.replace(")", "_")
.replace("\n", "_")
.replace("\t", "_")
.replace("=", "_")
)
if clean_col != col:
df = df.withColumnRenamed(col, clean_col)
return df
finally:
TEMP_FILES.put(temp_file_path)
# 并行处理表
def process_table_parallel(local_path, table_name, column_mapping, target_columns):
try:
df = read_table_from_access(local_path, table_name)
# 映射列名
if column_mapping:
for old_col, new_col in column_mapping.items():
df = df.withColumnRenamed(old_col, new_col)
# 确保列对齐
for col in target_columns:
if col not in df.columns:
df = df.withColumn(col, lit(None))
# 添加 ETL 字段
df = df.withColumn("etl_insert_dt", date_format(expr("current_timestamp() + INTERVAL 8 HOURS"), "yyyy-MM-dd HH:mm:ss"))
df = df.withColumn("etl_update_dt", date_format(expr("current_timestamp() + INTERVAL 8 HOURS"), "yyyy-MM-dd HH:mm:ss"))
# 重新排序列
spark_df = df.select(target_columns)
return spark_df
except Exception as e:
print(f"处理表 {table_name} 时出错: {e}")
return None
# 处理 Access 文件并转换为 Spark DataFrame
def process_access_files_with_config(access_files, config_df, column_mapping):
all_dataframes = []
# 获取目标表的列名
target_columns = list(column_mapping.values()) + ['etl_insert_dt', 'etl_update_dt']
with ThreadPoolExecutor(max_workers=8) as executor:
futures = []
# 读取配置文件中的配置信息
for index, row in config_df.iterrows():
file_name = row['file_name']
table_name = row['table_name']
# 找到匹配的文件
matching_files = [file for file in access_files if file_name in file]
if not matching_files:
print(f"未找到匹配的文件: {file_name}")
continue
# 读取匹配文件中的数据并合并
for file in matching_files:
try:
local_path = download_access_file(file, f"/Volumes/{NGBI_CATALOG}/tmp/volume_tmp/tmp/{os.path.basename(file)}")
table_names = list_tables_in_access_file(local_path)
for table in table_names:
futures.append(executor.submit(process_table_parallel, local_path, table, column_mapping, target_columns))
except Exception as e:
print(f"处理文件 {file} 时出错: {e}")
continue
for future in futures:
result = future.result()
if result:
all_dataframes.append((result, table_name))
return all_dataframes
# 设置原始路径模板
# 测试路径
# access_file_path_template = "abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
# 生产路径
if ENVIRONMENT == PRD_ENVIRONMENT_VALUE:
access_file_path_template = "abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
elif ENVIRONMENT == TEST_ENVIRONMENT_VALUE:
access_file_path_template = "abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
# 列出最新日期目录下的所有 Blob 文件
def list_latest_blob_files(base_path_template):
try:
current_date = datetime.utcnow() + timedelta(hours=8)
date_path = current_date.strftime("%Y/%m/%d/")
base_path = base_path_template + date_path
if path_exists(base_path):
all_files = list_files_recursive(base_path)
access_files = [file for file in all_files if is_access_file(file)]
if not access_files:
print("最新日期目录下未找到 Access 文件。")
return []
else:
return access_files
else:
print(f"指定路径不存在: {base_path}")
return []
except Exception as e:
print(f"列出路径模板 {base_path_template} 中的最新 Blob 文件时出错: {e}")
raise
# 检查路径是否存在
def path_exists(path):
try:
dbutils.fs.ls(path)
return True
except Exception as e:
if "java.io.FileNotFoundException" in str(e):
return False
else:
print(f"检查路径 {path} 时出错: {e}")
raise
# 判断文件是否为 Access 文件
def is_access_file(file_path):
return file_path.lower().endswith('.accdb')
# 递归遍历目录以列出所有文件
def list_files_recursive(path):
try:
files = []
if path_exists(path):
for file_info in dbutils.fs.ls(path):
if file_info.isDir():
files.extend(list_files_recursive(file_info.path))
else:
files.append(file_info.path)
return files
except Exception as e:
print(f"列出路径 {path} 中的文件时出错: {e}")
raise
# 设置原始路径模板
# 测试路径
# access_file_path_template = "abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
# 生产路径
# access_file_path_template = "abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/ODS/GND/UserUpload/"
# 列出最新日期目录下的 Blob 文件
latest_access_files = list_latest_blob_files(access_file_path_template)
if latest_access_files:
try:
# 从配置表中读取配置数据
config_df = spark.table("dwd.dwd_gnd_xiehe_config_table_mapping").toPandas()
# 字段映射
column_mapping = {
'AREA': 'area',
'Date': 'yq',
'医院级别': 'h_level',
'DEPT_NAME': 'dept_name',
'报销': 'reimburse',
'报销类型': 'reimburse_type',
'处方来源': 'prescription_source',
'ATC': 'atc',
'新code': 'new_code',
'通用名': 'common_name',
'商品名称': 'product_name',
'厂家': 'manu_des',
'规格': 'pack_des',
'给药途径': 'drug_delivery_route',
'剂型': 'nfc',
'处方张数': 'prescription',
'取药数量': 'sales_vol',
'单价': 'price',
'金额': 'sales_value'
}
# 处理并合并最新的 Access 文件的数据
all_dataframes = process_access_files_with_config(latest_access_files, config_df, column_mapping)
if all_dataframes:
combined_spark_df = None
for df, table_name in all_dataframes:
# 将更改字段名后的 DataFrame 转换为 Spark DataFrame
df.createOrReplaceTempView("temp_access_table")
spark.sql(f"CREATE TABLE IF NOT EXISTS {table_name} AS SELECT * FROM temp_access_table")
spark.sql(f"INSERT overwrite {table_name} SELECT * FROM temp_access_table")
if combined_spark_df is None:
combined_spark_df = df
else:
combined_spark_df = combined_spark_df.union(df)
# 合并所有写入的表成一个结果表 result
if combined_spark_df:
combined_spark_df.createOrReplaceTempView("combined_table")
spark.sql("create table if not exists dwd.dwd_inc_gnd_ext_xiehe_union_all as select * from combined_table")
spark.sql("insert overwrite dwd.dwd_inc_gnd_ext_xiehe_union_all select * from combined_table")
else:
print("Access 文件中没有找到数据。")
except Exception as e:
print(f"处理 Access 文件时出错: {e}")
else:
print("没有 Access 文件需要处理。")
# COMMAND ----------
# MAGIC %sql
# MAGIC delete from dwd.dwd_gnd_ext_xiehe_raw_data
# MAGIC where yq in (
# MAGIC select yq from dwd.dwd_inc_gnd_ext_xiehe_union_all t1
# MAGIC where left(t1.etl_insert_dt, 10) = left(from_utc_timestamp(current_timestamp(),'UTC+8'),10)
# MAGIC );
# MAGIC
# MAGIC insert into dwd.dwd_gnd_ext_xiehe_raw_data
# MAGIC (
# MAGIC area,
# MAGIC yq,
# MAGIC h_level,
# MAGIC dept_name,
# MAGIC reimburse,
# MAGIC reimburse_type,
# MAGIC prescription_source,
# MAGIC atc,
# MAGIC new_code,
# MAGIC common_name,
# MAGIC product_name,
# MAGIC manu_des,
# MAGIC pack_des,
# MAGIC drug_delivery_route,
# MAGIC nfc,
# MAGIC prescription,
# MAGIC sales_vol,
# MAGIC price,
# MAGIC sales_value,
# MAGIC etl_insert_dt,
# MAGIC etl_update_dt
# MAGIC )
# MAGIC select area,
# MAGIC yq,
# MAGIC h_level,
# MAGIC dept_name,
# MAGIC reimburse,
# MAGIC reimburse_type,
# MAGIC prescription_source,
# MAGIC atc,
# MAGIC new_code,
# MAGIC common_name,
# MAGIC product_name,
# MAGIC manu_des,
# MAGIC pack_des,
# MAGIC drug_delivery_route,
# MAGIC nfc,
# MAGIC prescription,
# MAGIC sales_vol,
# MAGIC price,
# MAGIC sales_value,
# MAGIC from_utc_timestamp(current_timestamp(),'UTC+8') etl_insert_dt,
# MAGIC from_utc_timestamp(current_timestamp(),'UTC+8') etl_update_dt
# MAGIC from dwd.dwd_inc_gnd_ext_xiehe_union_all
# MAGIC where left(etl_insert_dt, 10) = left(from_utc_timestamp(current_timestamp(),'UTC+8'),10);

View File

@@ -0,0 +1,115 @@
-- Databricks notebook source
--------------------------------------------------------------------------------
--修改时间20241025
--FanXujia
--
--packinfo表
--------------------------------------------------------------------------------
--------------------------------------对协和raw data 数据挂上pack geo信息,并获取本同期数据---------------------------------------------------
insert overwrite table dws.dws_ext_xiehe_raw_data
select
yyyymm
,IQVIA_PACK_CODE
,AUDIT_COD
,h_level
,reimburse
,reimburse_type
,prescription_source
,area
,dept_name
,new_code
,sum(prescription) prescription
,SUM(sales_vol) counting_unit
,SUM(price) price
,SUM(sales_value) sales_value
,sum(prescription_ly) prescription_ly
,SUM(sales_vol_ly) counting_unit_ly
,SUM(price_ly) price_ly
,SUM(sales_value_ly) sales_value_ly
,from_utc_timestamp(current_timestamp(),'UTC+8') etl_insert_dt
,from_utc_timestamp(current_timestamp(),'UTC+8') etl_update_dt
FROM (
select
cast(concat('20',left(yq,2))*100 + right(yq,1)*3 as int ) yyyymm
,if( b.IQVIA_PACK_CODE REGEXP '^[0-9]',right(concat('000000000000',b.IQVIA_PACK_CODE),12),b.IQVIA_PACK_CODE) IQVIA_PACK_CODE --right(concat('0000000',b.IQVIA_PACK_CODE),7 )
,c.AUDIT_COD
,a.h_level
,a.reimburse
,a.reimburse_type
,a.prescription_source
,a.area
,a.dept_name
,a.new_code
,a.prescription
,a.sales_vol
,a.price
,a.sales_value
,0 as prescription_ly
,0 as sales_vol_ly
,0 as price_ly
,0 as sales_value_ly
from dwd.dwd_gnd_ext_xiehe_raw_data a
left join (select DISTINCT pack_code,IQVIA_PACK_CODE from dwd.dwd_gnd_xiehe_pack_info) b on upper(b.pack_code) = upper(a.new_code)
left join dm.dm_ims_td_geo c on c.CITY_C = a.area
union all
select
cast(concat('20',left(yq,2))*100 + right(yq,1)*3 +100 as int ) yyyymm
,if( b.IQVIA_PACK_CODE REGEXP '^[0-9]',right(concat('000000000000',b.IQVIA_PACK_CODE),12),b.IQVIA_PACK_CODE) IQVIA_PACK_CODE --right(concat('0000000',b.IQVIA_PACK_CODE),7 )
,c.AUDIT_COD
,a.h_level
,a.reimburse
,a.reimburse_type
,a.prescription_source
,a.area
,a.dept_name
,a.new_code
,0 as prescription
,0 as sales_vol
,0 as price
,0 as sales_value
,a.prescription as prescription_ly
,a.sales_vol as sales_vol_ly
,a.price as price_ly
,a.sales_value as sales_value_ly
from dwd.dwd_gnd_ext_xiehe_raw_data a
left join (select DISTINCT pack_code,IQVIA_PACK_CODE from dwd.dwd_gnd_xiehe_pack_info) b on upper(b.pack_code) = upper(a.new_code)
left join dm.dm_ims_td_geo c on c.CITY_C = a.area
where cast(concat('20',left(yq,2))*100 + right(yq,1)*3 +100 as int )
<=(SELECT MAX(cast(concat('20',left(yq,2))*100 + right(yq,1)*3 as int )) from dwd.dwd_gnd_ext_xiehe_raw_data )
)
GROUP BY
yyyymm
,IQVIA_PACK_CODE
,AUDIT_COD
,h_level
,reimburse
,reimburse_type
,prescription_source
,area
,dept_name
,new_code
-- COMMAND ----------
----------raw data dm------------
insert overwrite table dm.dm_ext_xiehe_sales
select
yyyymm
,IQVIA_PACK_CODE
,AUDIT_COD
,h_level
,reimburse
,reimburse_type
,prescription_source
,area
,dept_name
,new_code
,prescription
,counting_unit
,price
,sales_value
,prescription_ly
,counting_unit_ly
,price_ly
,sales_value_ly
from dws.dws_ext_xiehe_raw_data

View File

@@ -0,0 +1,776 @@
-- Databricks notebook source
---------------------------------------------------------------------------------
insert overwrite table tmp.tmp_ext_xiehe_pack_property_tmp
----code
---
select
distinct
--20250428 chenwu iqivacode扩到12位,brandcode 9
if( IQVIA_PACK_CODE REGEXP '^[0-9]',right(concat('000000000000',IQVIA_PACK_CODE),12),IQVIA_PACK_CODE) as IQVIA_PACK_CODE --right(concat('0000000', IQVIA_PACK_CODE),7)
,right(concat('000000000', IQVIA_PROD_CODE),9) as IQVIA_PROD_CODE --right(concat('00000', IQVIA_PROD_CODE),5)
,a.pack_des
,a.APP1_COD
,a.APP1_DES
,a.APP1_DES_C
,a.APP2_COD
,a.APP2_DES
,a.APP2_DES_C
,a.APP3_COD
,a.APP3_DES
,a.APP3_DES_C
,a.ATC1_COD
,a.ATC1_DES
,a.ATC1_DES_C
,a.ATC2_COD
,a.ATC2_DES
,a.ATC2_DES_C
,a.ATC3_COD
,a.ATC3_DES
,a.ATC3_DES_C
,a.ATC4_COD
,a.ATC4_DES
,a.ATC4_DES_C
,a.BIO_DESC
,right(concat('000000', a.CMPS_COD),6) AS CMPS_COD
,a.CMPS_DES
,a.CMPS_DES_C
,COALESCE(c.corp_cod,b.corp_cod,a.corp_cod) as corp_cod
,COALESCE(c.corp_des,b.corp_des,case when a.CORP_DES like '% GROUP%' then replace(a.CORP_DES,' GROUP','') else a.CORP_DES end) as CORP_DES
,COALESCE(d.corp_des_c,a.corp_des_c ) as corp_des_c
,a.EDL_DESC
,a.ETH_OTC_DESC
,a.GENE_ORIG_DESC
,a.GQCE_DESC
,COALESCE(c.manu_cod,b.manu_cod,a.manu_cod) manu_cod
,COALESCE(c.manu_des,b.manu_des,a.manu_des) manu_des
,COALESCE(e.manu_des_c,a.manu_des_c) manu_des_c
,case when a.MNFL_COD = 'L' then 'L' else 'M' end MNFL_COD --chpa逻辑
,case when a.MNFL_DES='Local' then 'LOCAL' else 'MNC' end MNFL_DES --chpa逻辑
-- ,a.MNFL_COD
-- ,a.MNFL_DES
,a.NRDL_DESC
,a.STGH_DES
,a.PACK_LCH
,a.PAED_DESC
,a.PROD_DES
,a.PROD_DES_C
,a.TCM_DESC
,CASE WHEN a.VBP_DESC IN ('VBP-IN','VBP-N/A') THEN 'VBP' ELSE 'Non VBP' END VBP_DESC --chpa逻辑
-- ,a.VBP_DESC
,a.Unit
,a.Counting_Unit
,a.Dosage_Unit
from dwd.dwd_gnd_xiehe_pack_info a
left join (select right(concat('000000000',prod_cod ),9) prod_cod,corp_cod
,case when CORP_DES like '% GROUP%' then replace(CORP_DES,' GROUP','') else CORP_DES end corp_des,right(concat('000000',manu_cod ),6) manu_cod,manu_des
from dwd.dwd_gnd_tbl_corp_change
where pack_cod is null
) b on right(concat('000000000', IQVIA_PROD_CODE),9)= b.prod_cod
--right(concat('00000',A.IQVIA_PROD_CODE ),5)= b.prod_cod
left join (select if( pack_cod REGEXP '^[0-9]',right(concat('000000000000',pack_cod),12),pack_cod) pack_cod,corp_cod
,case when CORP_DES like '% GROUP%' then replace(CORP_DES,' GROUP','') else CORP_DES end corp_des,right(concat('000000',manu_cod ),6) manu_cod,manu_des
from dwd.dwd_gnd_tbl_corp_change
where pack_cod is not null
) c on if( IQVIA_PACK_CODE REGEXP '^[0-9]',right(concat('000000000000',IQVIA_PACK_CODE),12),IQVIA_PACK_CODE) = c.pack_cod
--right(concat('0000000', IQVIA_PACK_CODE),7) = c.pack_cod
left join (select distinct corp_cod,corp_des_c from dwd.dwd_gnd_xiehe_pack_info) d on coalesce(C.corp_cod,B.corp_cod ) = d.corp_cod
Left join (select distinct right(concat('000000',manu_cod ),6) manu_cod,manu_des,manu_des_c from dwd.dwd_gnd_xiehe_pack_info) e On coalesce(C.manu_cod,B.manu_cod ) = e.manu_cod
;
--------tblmarket
-----MARKET
-- Extend_Market NOT_IN_FLAG
insert overwrite table tmp.tmp_ext_xiehe_pack_property
select distinct
t2.market,t1.*,
t2.bu,CASE WHEN t2.extend_market_ratio IS NULL THEN '1' ELSE t2.extend_market_ratio END AS Market_Ratio
from tmp.tmp_ext_xiehe_pack_property_tmp t1
left join (select * from dwd.dwd_gnd_ims_tblmarket WHERE Extend_Market IS NULL AND ( NOT_IN_FLAG IS NULL or NOT_IN_FLAG = '1' )) t2
on t1.ATC1_COD = case when t2.ATC1_Code is null then t1.ATC1_COD else t2.ATC1_Code end
and t1.ATC2_COD = case when t2.ATC2_Code is null then t1.ATC2_COD else t2.ATC2_Code end
and t1.ATC3_COD = case when t2.ATC3_Code is null then t1.ATC3_COD else t2.ATC3_Code end
and t1.ATC4_COD = case when t2.ATC4_Code is null then t1.ATC4_COD else t2.ATC4_Code end
and t1.APP1_COD = case when t2.NFC1_Code is null then t1.APP1_COD else t2.NFC1_Code end
and t1.APP2_COD = case when t2.NFC2_Code is null then t1.APP2_COD else t2.NFC2_Code end
and t1.APP3_COD = case when t2.NFC3_Code is null then t1.APP3_COD else t2.NFC3_Code end
and t1.CORP_COD = case when t2.corporation_code is null then t1.CORP_COD else t2.corporation_code end
and t1.MANU_COD = case when t2.Manufacturer_Code is null then t1.MANU_COD else t2.Manufacturer_Code end
and right(concat('000000000', coalesce(t1.IQVIA_PROD_CODE,'')),9) = case when t2.Product_Code is null then right(concat('000000000', coalesce(t1.IQVIA_PROD_CODE,'')),9) else right(concat('000000000', coalesce(t2.Product_Code,'')),9) end
-- and t1.IQVIA_PACK_CODE = case when t2.Pack_Code is null then t1.IQVIA_PACK_CODE else t2.Pack_Code end
and nvl(t1.STGH_DES,'') = case when t2.Strength is null then nvl(t1.STGH_DES,'') else nvl(t2.Strength,'') end
and right(concat('000000', coalesce(t1.CMPS_COD,'')),6) = case when t2.Molecule_Code is null then right(concat('000000', coalesce(t1.CMPS_COD,'')),6) else right(concat('000000', coalesce( t2.Molecule_Code ,'')),6) end
and if( coalesce(t1.IQVIA_PACK_CODE,'') REGEXP '^[0-9]',right(concat('000000000000', coalesce(t1.IQVIA_PACK_CODE,'')),12),coalesce(t1.IQVIA_PACK_CODE,''))
= case when t2.pack_code is null
then if( coalesce(t1.IQVIA_PACK_CODE,'') REGEXP '^[0-9]',right(concat('000000000000', coalesce(t1.IQVIA_PACK_CODE,'')),12),coalesce(t1.IQVIA_PACK_CODE,''))
else if( coalesce(t2.pack_code,'') REGEXP '^[0-9]',right(concat('000000000000', coalesce(t2.pack_code,'')),12),coalesce(t2.pack_code,'')) end
where t2.market is not null
;
--
-- Extend_Market NOT_IN_FLAG
insert overwrite table tmp.tmp_ext_xiehe_pack_property_del
select distinct t2.market,t1.*,t2.bu,CASE WHEN t2.extend_market_ratio IS NULL THEN '1' ELSE t2.extend_market_ratio END AS Market_Ratio
from tmp.tmp_ext_xiehe_pack_property_tmp t1
left join (select * from dwd.dwd_gnd_ims_tblmarket WHERE Extend_Market IS NULL AND NOT_IN_FLAG = '0' ) t2
on t1.ATC1_COD = case when t2.ATC1_Code is null then t1.ATC1_COD else t2.ATC1_Code end
and t1.ATC2_COD = case when t2.ATC2_Code is null then t1.ATC2_COD else t2.ATC2_Code end
and t1.ATC3_COD = case when t2.ATC3_Code is null then t1.ATC3_COD else t2.ATC3_Code end
and t1.ATC4_COD = case when t2.ATC4_Code is null then t1.ATC4_COD else t2.ATC4_Code end
and t1.APP1_COD = case when t2.NFC1_Code is null then t1.APP1_COD else t2.NFC1_Code end
and t1.APP2_COD = case when t2.NFC2_Code is null then t1.APP2_COD else t2.NFC2_Code end
and t1.APP3_COD = case when t2.NFC3_Code is null then t1.APP3_COD else t2.NFC3_Code end
and t1.CORP_COD = case when t2.corporation_code is null then t1.CORP_COD else t2.corporation_code end
and t1.MANU_COD = case when t2.Manufacturer_Code is null then t1.MANU_COD else t2.Manufacturer_Code end
and right(concat('000000000', coalesce(t1.IQVIA_PROD_CODE,'')),9) = case when t2.Product_Code is null then right(concat('000000000', coalesce(t1.IQVIA_PROD_CODE,'')),9) else right(concat('000000000', coalesce(t2.Product_Code,'')),9) end
and nvl(t1.STGH_DES,'') = case when t2.Strength is null then nvl(t1.STGH_DES,'') else nvl(t2.Strength,'') end
and right(concat('000000', coalesce(t1.CMPS_COD,'')),6) = case when t2.Molecule_Code is null then right(concat('000000', coalesce(t1.CMPS_COD,'')),6) else right(concat('000000', coalesce( t2.Molecule_Code ,'')),6) end
and if( coalesce(t1.IQVIA_PACK_CODE,'') REGEXP '^[0-9]',right(concat('000000000000', coalesce(t1.IQVIA_PACK_CODE,'')),12),coalesce(t1.IQVIA_PACK_CODE,''))
= case when t2.pack_code is null
then if( coalesce(t1.IQVIA_PACK_CODE,'') REGEXP '^[0-9]',right(concat('000000000000', coalesce(t1.IQVIA_PACK_CODE,'')),12),coalesce(t1.IQVIA_PACK_CODE,''))
else if( coalesce(t2.pack_code,'') REGEXP '^[0-9]',right(concat('000000000000', coalesce(t2.pack_code,'')),12),coalesce(t2.pack_code,'')) end
where t2.market is not null
;
--TempMKT中删除数据
--TempMKT的字段结构Value里面放个空值
MERGE INTO tmp.tmp_ext_xiehe_pack_property AS t1
USING tmp.tmp_ext_xiehe_pack_property_del AS t2
on t1.ATC1_COD = t2.ATC1_COD
and t1.ATC2_COD = t2.ATC2_COD
and t1.ATC3_COD = t2.ATC3_COD
and t1.ATC4_COD = t2.ATC4_COD
and t1.APP1_COD = t2.APP1_COD
and t1.APP2_COD = t2.APP2_COD
and t1.APP3_COD = t2.APP3_COD
and t1.CORP_COD = t2.CORP_COD
and t1.MANU_COD = t2.MANU_COD
and t1.IQVIA_PROD_CODE = t2.IQVIA_PROD_CODE
and t1.IQVIA_PACK_CODE = t2.IQVIA_PACK_CODE
and t1.STGH_DES = t2.STGH_DES
and t1.CMPS_COD = t2.CMPS_COD
and t1.market = t2.market
WHEN MATCHED THEN DELETE
;
---
-- Extend_Market
insert into tmp.tmp_ext_xiehe_pack_property
select distinct
t2.Market,
t1.iqvia_pack_code ,
t1.iqvia_prod_code
,t1.pack_des
,t1.APP1_COD
,t1.APP1_DES
,t1.APP1_DES_C
,t1.APP2_COD
,t1.APP2_DES
,t1.APP2_DES_C
,t1.APP3_COD
,t1.APP3_DES
,t1.APP3_DES_C
,t1.ATC1_COD
,t1.ATC1_DES
,t1.ATC1_DES_C
,t1.ATC2_COD
,t1.ATC2_DES
,t1.ATC2_DES_C
,t1.ATC3_COD
,t1.ATC3_DES
,t1.ATC3_DES_C
,t1.ATC4_COD
,t1.ATC4_DES
,t1.ATC4_DES_C
,t1.BIO_DESC
,t1.CMPS_COD
,t1.CMPS_DES
,t1.CMPS_DES_C
,t1.corp_cod
,t1.CORP_DES
,t1.corp_des_c
,t1.EDL_DESC
,t1.ETH_OTC_DESC
,t1.GENE_ORIG_DESC
,t1.GQCE_DESC
,t1.manu_cod
,t1.manu_des
,t1.manu_des_c
,t1.MNFL_COD
,t1.MNFL_DES
,t1.NRDL_DESC
,t1.STGH_DES
,t1.PACK_LCH
,t1.PAED_DESC
,t1.PROD_DES
,t1.PROD_DES_C
,t1.TCM_DESC
,t1.VBP_DESC
,t1.Unit
,t1.Counting_Unit
,t1.Dosage_Unit
,t2.BU
,case when t2.Extend_Market_Ratio is null then '1' end AS Market_Ratio --1
from tmp.tmp_ext_xiehe_pack_property t1
left join (select Market, BU,Extend_Market,Extend_Market_Ratio from dwd.dwd_gnd_ims_tblmarket WHERE Extend_Market IS NOT NULL) t2 on t1.Market=t2.Extend_Market
where t2.Market is not null
;
-- COMMAND ----------
--Market的pack
insert into tmp.tmp_ext_xiehe_pack_property
(
Market,
IQVIA_PACK_CODE,
IQVIA_PROD_CODE,
pack_des,
APP1_COD,
APP1_DES,
APP1_DES_C,
APP2_COD,
APP2_DES,
APP2_DES_C,
APP3_COD,
APP3_DES,
APP3_DES_C,
ATC1_COD,
ATC1_DES,
ATC1_DES_C,
ATC2_COD,
ATC2_DES,
ATC2_DES_C,
ATC3_COD,
ATC3_DES,
ATC3_DES_C,
ATC4_COD,
ATC4_DES,
ATC4_DES_C,
BIO_DESC,
CMPS_COD,
CMPS_DES,
CMPS_DES_C,
corp_cod,
CORP_DES,
corp_des_c,
EDL_DESC,
ETH_OTC_DESC,
GENE_ORIG_DESC,
GQCE_DESC,
manu_cod,
manu_des,
manu_des_c,
MNFL_COD,
MNFL_DES,
NRDL_DESC,
STGH_DES,
PACK_LCH,
PAED_DESC,
PROD_DES,
PROD_DES_C,
TCM_DESC,
VBP_DESC,
Unit,
Counting_Unit,
Dosage_Unit,
bu,
Market_Ratio
)
select distinct
'Non Market' as Market,
t1.IQVIA_PACK_CODE,
t1.IQVIA_PROD_CODE,
t1.pack_des,
t1.APP1_COD,
t1.APP1_DES,
t1.APP1_DES_C,
t1.APP2_COD,
t1.APP2_DES,
t1.APP2_DES_C,
t1.APP3_COD,
t1.APP3_DES,
t1.APP3_DES_C,
t1.ATC1_COD,
t1.ATC1_DES,
t1.ATC1_DES_C,
t1.ATC2_COD,
t1.ATC2_DES,
t1.ATC2_DES_C,
t1.ATC3_COD,
t1.ATC3_DES,
t1.ATC3_DES_C,
t1.ATC4_COD,
t1.ATC4_DES,
t1.ATC4_DES_C,
t1.BIO_DESC,
t1.CMPS_COD,
t1.CMPS_DES,
t1.CMPS_DES_C,
t1.corp_cod,
t1.CORP_DES,
t1.corp_des_c,
t1.EDL_DESC,
t1.ETH_OTC_DESC,
t1.GENE_ORIG_DESC,
t1.GQCE_DESC,
t1.manu_cod,
t1.manu_des,
t1.manu_des_c,
t1.MNFL_COD,
t1.MNFL_DES,
t1.NRDL_DESC,
t1.STGH_DES,
t1.PACK_LCH,
t1.PAED_DESC,
t1.PROD_DES,
t1.PROD_DES_C,
t1.TCM_DESC,
t1.VBP_DESC,
t1.Unit,
t1.Counting_Unit,
t1.Dosage_Unit,
'' as bu,
'1' as Market_Ratio
from tmp.tmp_ext_xiehe_pack_property_tmp t1
left join tmp.tmp_ext_xiehe_pack_property t2
on t1.IQVIA_PACK_CODE = t2.IQVIA_PACK_CODE
where t2.IQVIA_PACK_CODE is null
-- COMMAND ----------
-------------------------------------------------------------------------------
------对属于az 的产品打上 is az 标识
insert overwrite table dws.dws_ext_xiehe_pack_property
select
concat(t1.Market,'_',T1.IQVIA_PACK_CODE) as MARKET_PACK_KEY
,t1.market
,t1.IQVIA_PACK_CODE
,t1.IQVIA_PROD_CODE
,t1.pack_des
,t1.APP1_COD
,t1.APP1_DES
,t1.APP1_DES_C
,t1.APP2_COD
,t1.APP2_DES
,t1.APP2_DES_C
,t1.APP3_COD
,t1.APP3_DES
,t1.APP3_DES_C
,t1.ATC1_COD
,t1.ATC1_DES
,t1.ATC1_DES_C
,t1.ATC2_COD
,t1.ATC2_DES
,t1.ATC2_DES_C
,t1.ATC3_COD
,t1.ATC3_DES
,t1.ATC3_DES_C
,t1.ATC4_COD
,t1.ATC4_DES
,t1.ATC4_DES_C
,t1.BIO_DESC
,t1.CMPS_COD
,t1.CMPS_DES
,t1.CMPS_DES_C
,t1.corp_cod
,t1.CORP_DES
,t1.corp_des_c
,t1.EDL_DESC
,t1.ETH_OTC_DESC
,t1.GENE_ORIG_DESC
,t1.GQCE_DESC
,t1.manu_cod
,t1.manu_des
,t1.manu_des_c
,t1.MNFL_COD
,t1.MNFL_DES
,t1.NRDL_DESC
,t1.STGH_DES
,t1.PACK_LCH
,t1.PAED_DESC
,t1.PROD_DES
,t1.PROD_DES_C
,t1.TCM_DESC
,t1.VBP_DESC
,t1.Unit
,t1.Counting_Unit
,t1.Dosage_Unit
,t1.bu
,t1.Market_Ratio
,NRDL_ENTRY_DATE
,case when ta.prod_code is null then 'N' ELSE 'Y' END IS_AZ
,null as AZ_MAIN
,BRANDTYPE.brand_type AS BRANDTYPE
from tmp.tmp_ext_xiehe_pack_property t1
left join (select distinct Pack_Code,NRDL_ENTRY_DATE from dwd.dwd_ims_td_pack_additional_attribute ) t2 on t1.IQVIA_PACK_CODE =t2.Pack_Code
left join (select distinct IQVIA_PROD_CODE prod_code from tmp.tmp_ext_xiehe_pack_property where corp_cod='A5Z' ) ta on t1.IQVIA_PROD_CODE=ta.prod_code
LEFT JOIN dwd.dwd_gnd_ims_tblbrandtype BRANDTYPE ON t1.IQVIA_PACK_CODE = BRANDTYPE.PACK_COD
;
-- COMMAND ----------
-------------------------------------------------------------------------------
insert overwrite table tmp.tmp_dm_ext_xiehe_pack_property
select
t1.MARKET_PACK_KEY
,t1.market
,t1.IQVIA_PACK_CODE
,t1.IQVIA_PROD_CODE
,t1.pack_des
,t1.APP1_COD
,t1.APP1_DES
,t1.APP1_DES_C
,t1.APP2_COD
,t1.APP2_DES
,t1.APP2_DES_C
,t1.APP3_COD
,t1.APP3_DES
,t1.APP3_DES_C
,t1.ATC1_COD
,t1.ATC1_DES
,t1.ATC1_DES_C
,t1.ATC2_COD
,t1.ATC2_DES
,t1.ATC2_DES_C
,t1.ATC3_COD
,t1.ATC3_DES
,t1.ATC3_DES_C
,t1.ATC4_COD
,t1.ATC4_DES
,t1.ATC4_DES_C
,t1.BIO_DESC
,t1.CMPS_COD
,t1.CMPS_DES
,t1.CMPS_DES_C
,t1.corp_cod
,t1.CORP_DES
,t1.corp_des_c
,t1.EDL_DESC
,t1.ETH_OTC_DESC
,t1.GENE_ORIG_DESC
,t1.GQCE_DESC
,t1.manu_cod
,t1.manu_des
,t1.manu_des_c
,t1.MNFL_COD
,t1.MNFL_DES
,t1.NRDL_DESC
,t1.STGH_DES
,t1.PACK_LCH
,t1.PAED_DESC
,t1.PROD_DES
,t1.PROD_DES_C
,t1.TCM_DESC
,t1.VBP_DESC
,t1.Unit
,t1.Counting_Unit
,t1.Dosage_Unit
,t1.bu
,t1.Market_Ratio
,t1.NRDL_ENTRY_DATE
,t1.IS_AZ
,t1.AZ_MAIN
,t1.BRANDTYPE
,nvl(cla.class,'Others') class
,ta.ta
from dws.dws_ext_xiehe_pack_property t1
left join dwd.dwd_gnd_ext_retail_dim_ta ta on t1.market = ta.market
left join dwd.dwd_gnd_tblclass cla on t1.MARKET = case when cla.market is null then t1.market else cla.market end
and nvl(t1.iqvia_pack_code,'') = case when cla.pack_code is null then nvl(t1.iqvia_pack_code,'')
else if( cla.pack_code REGEXP '^[0-9]',right(concat('000000000000',cla.pack_code),12),cla.pack_code) end --right(concat('0000000',cla.pack_code ), 7)
and nvl(t1.IQVIA_PROD_CODE,'') = case when cla.product_code is null then nvl(t1.IQVIA_PROD_CODE ,'')
else right(concat('000000000',cla.product_code ),9 ) end --right(concat('00000',cla.product_code ),5 )
and nvl(t1.CMPS_COD,'') = case when cla.molecule_code is null then nvl(t1.cmps_cod ,'') else right(concat('000000', cla.molecule_code),6 ) end
and nvl(t1.corp_cod,'') = case when cla.Corporation_code is null then nvl(t1.corp_cod ,'') else cla.Corporation_code end
and nvl(t1.manu_cod,'')= case when cla.Manufacturer_Code is null then nvl(t1.manu_cod,'') else cla.Manufacturer_Code end
and nvl(t1.atc1_cod,'')= case when cla.ATC1_Code is null then nvl(t1.atc1_cod ,'') else cla.ATC1_Code end
and nvl(t1.ATC2_COD,'') = case when cla.ATC2_Code is null then nvl(t1.ATC2_COD ,'') else cla.ATC2_Code end
and nvl(t1.ATC3_COD,'') = case when cla.ATC3_Code is null then nvl(t1.ATC3_COD ,'') else cla.ATC3_Code end
and nvl(t1.ATC4_COD,'') = case when cla.ATC4_Code is null then nvl(t1.ATC4_COD ,'') else cla.ATC4_Code end
and nvl(t1.app1_cod,'') = case when cla.NFC1_Code is null then nvl(t1.app1_cod ,'') else cla.NFC1_Code end
and nvl(t1.APP2_COD,'') = case when cla.NFC2_Code is null then nvl(t1.APP2_COD ,'') else cla.NFC2_Code end
and nvl(t1.APP3_COD,'') = case when cla.NFC3_Code is null then nvl(t1.APP3_COD ,'') else cla.NFC3_Code end
and nvl(t1.stgh_des,'') = case when cla.Strength is null then nvl(t1.stgh_des ,'') else cla.Strength end
;
-- COMMAND ----------
with ims_pack_property as (
select
MARKET_PACK_KEY,
max(PACK_COD) as PACK_COD,
max(PACK_DES) as PACK_DES,
max(STGH_DES) as STGH_DES,
max(PACK_LCH) as PACK_LCH,
max(Family_Code) as Family_Code,
max(Family_Name) as Family_Name,
max(PROD_COD) as PROD_COD,
max(PROD_DES) as PROD_DES,
max(PROD_DES_C) as PROD_DES_C,
max(CMPS_COD) as CMPS_COD,
max(CMPS_DES) as CMPS_DES,
max(CMPS_DES_C) as CMPS_DES_C,
max(ATC1_COD) as ATC1_COD,
max(ATC2_COD) as ATC2_COD,
max(ATC3_COD) as ATC3_COD,
max(ATC4_COD) as ATC4_COD,
max(APP1_COD) as APP1_COD,
max(APP2_COD) as APP2_COD,
max(APP3_COD) as APP3_COD,
max(BIO_DESC) as BIO_DESC,
max(GENE_ORIG_DESC) as GENE_ORIG_DESC,
max(ETH_OTC_DESC) as ETH_OTC_DESC,
max(NRDL_DESC) as NRDL_DESC,
max(NRDL_Entry_Date) as NRDL_Entry_Date,
max(EDL_DESC) as EDL_DESC,
max(TCM_DESC) as TCM_DESC,
max(PAED_DESC) as PAED_DESC,
max(GQCE_DESC) as GQCE_DESC,
max(VBP_DESC) as VBP_DESC,
max(MANU_COD) as MANU_COD,
max(MANU_DES) as MANU_DES,
max(MANU_DES_C) as MANU_DES_C,
max(MNFL_COD) as MNFL_COD,
max(MNFL_DES) as MNFL_DES,
max(CORP_COD) as CORP_COD,
max(CORP_DES) as CORP_DES,
max(CORP_DES_C) as CORP_DES_C,
max(BrandType) as BrandType,
max(MARKET) as MARKET,
max(KEY_COMPETITOR) as KEY_COMPETITOR,
max(IS_AZ) as IS_AZ,
max(AZ_MAIN) as AZ_MAIN,
max(AZ_Related) as AZ_Related,
max(atc1_des) as atc1_des,
max(atc1_des_c) as atc1_des_c,
max(atc2_des) as atc2_des,
max(atc2_des_c) as atc2_des_c,
max(atc3_des) as atc3_des,
max(atc3_des_c) as atc3_des_c,
max(atc4_des) as atc4_des,
max(atc4_des_c) as atc4_des_c,
max(app1_des) as app1_des,
max(app1_des_c) as app1_des_c,
max(app2_des) as app2_des,
max(app2_des_c) as app2_des_c,
max(app3_des) as app3_des,
max(app3_des_c) as app3_des_c,
max(Class) as Class
from dm.dm_ims_td_pack_property
group by MARKET_PACK_KEY
)
insert overwrite table dm.dm_ext_xiehe_pack_property (
MARKET_PACK_KEY,
PACK_COD,
PACK_DES,
STGH_DES,
PACK_LCH,
Family_Code,
Family_Name,
PROD_COD,
PROD_DES,
PROD_DES_C,
CMPS_COD,
CMPS_DES,
CMPS_DES_C,
ATC1_COD,
ATC2_COD,
ATC3_COD,
ATC4_COD,
APP1_COD,
APP2_COD,
APP3_COD,
BIO_DESC,
GENE_ORIG_DESC,
ETH_OTC_DESC,
NRDL_DESC,
NRDL_Entry_Date,
EDL_DESC,
TCM_DESC,
PAED_DESC,
GQCE_DESC,
VBP_DESC,
MANU_COD,
MANU_DES,
MANU_DES_C,
MNFL_COD,
MNFL_DES,
CORP_COD,
CORP_DES,
CORP_DES_C,
BrandType,
MARKET,
KEY_COMPETITOR,
IS_AZ,
AZ_MAIN,
AZ_Related,
atc1_des,
atc1_des_c,
atc2_des,
atc2_des_c,
atc3_des,
atc3_des_c,
atc4_des,
atc4_des_c,
app1_des,
app1_des_c,
app2_des,
app2_des_c,
app3_des,
app3_des_c,
Class,
ETL_INSERT_DT,
ETL_UPDATE_DT,
Market_Ratio,
counting_unit,
bu,
Dosage_Unit,
ta
)
select distinct
t1.market_pack_key,
t1.iqvia_pack_code as PACK_COD,
coalesce(t2.PACK_DES,t1.pack_des) as PACK_DES,
coalesce(t2.STGH_DES,t1.stgh_des) as STGH_DES,
coalesce(t2.PACK_LCH,t1.pack_lch) as PACK_LCH,
t2.Family_Code,
t2.Family_Name,
coalesce(t2.PROD_COD,t1.iqvia_prod_code) as PROD_COD,
coalesce(t2.PROD_DES,t1.prod_des) as PROD_DES,
coalesce(t2.PROD_DES_C,t1.prod_des_c) as PROD_DES_C,
coalesce(t2.CMPS_COD,t1.cmps_cod) as CMPS_COD,
coalesce(t2.CMPS_DES,t1.cmps_des) as CMPS_DES,
coalesce(t2.CMPS_DES_C,t1.cmps_des_c) as CMPS_DES_C,
coalesce(t2.ATC1_COD,t1.atc1_cod) as ATC1_COD,
coalesce(t2.ATC2_COD,t1.atc2_cod) as ATC2_COD,
coalesce(t2.ATC3_COD,t1.atc3_cod) as ATC3_COD,
coalesce(t2.ATC4_COD,t1.atc4_cod) as ATC4_COD,
coalesce(t2.APP1_COD,t1.app1_cod) as APP1_COD,
coalesce(t2.APP2_COD,t1.app2_cod) as APP2_COD,
coalesce(t2.APP3_COD,t1.app3_cod) as APP3_COD,
coalesce(t2.BIO_DESC,t1.bio_desc) as BIO_DESC,
coalesce(t2.GENE_ORIG_DESC,t1.gene_orig_desc) as GENE_ORIG_DESC,
coalesce(t2.ETH_OTC_DESC,t1.eth_otc_desc) as ETH_OTC_DESC,
coalesce(t2.NRDL_DESC,t1.nrdl_desc) as NRDL_DESC,
coalesce(t2.NRDL_Entry_Date,t1.NRDL_Entry_Date) as NRDL_Entry_Date,
coalesce(t2.EDL_DESC,t1.edl_desc) as EDL_DESC,
coalesce(t2.TCM_DESC,t1.tcm_desc) as TCM_DESC,
coalesce(t2.PAED_DESC,t1.paed_desc) as PAED_DESC,
coalesce(t2.GQCE_DESC,t1.gqce_desc) as GQCE_DESC,
coalesce(t2.VBP_DESC,t1.vbp_desc) as VBP_DESC,
coalesce(t2.MANU_COD,t1.manu_cod) as MANU_COD,
coalesce(t2.MANU_DES,t1.manu_des) as MANU_DES,
coalesce(t2.MANU_DES_C,t1.manu_des_c) as MANU_DES_C,
coalesce(t2.MNFL_COD,t1.mnfl_cod) as MNFL_COD,
coalesce(t2.MNFL_DES,t1.mnfl_des) as MNFL_DES,
coalesce(t2.CORP_COD,t1.corp_cod) as CORP_COD,
coalesce(t2.CORP_DES,t1.corp_des) as CORP_DES,
coalesce(t2.CORP_DES_C,t1.corp_des_c) as CORP_DES_C,
coalesce(t2.BrandType,t1.BrandType) as BrandType,
t1.market,
t2.KEY_COMPETITOR,
coalesce(t2.IS_AZ,t1.IS_AZ) as IS_AZ,
coalesce(t2.AZ_MAIN,t1.AZ_MAIN) as AZ_MAIN,
t2.AZ_Related,
coalesce(t2.atc1_des,t1.atc1_des) as atc1_des,
coalesce(t2.atc1_des_c,t1.atc1_des_c) as atc1_des_c,
coalesce(t2.atc2_des,t1.atc2_des) as atc2_des,
coalesce(t2.atc2_des_c,t1.atc2_des_c) as atc2_des_c,
coalesce(t2.atc3_des,t1.atc3_des) as atc3_des,
coalesce(t2.atc3_des_c,t1.atc3_des_c) as atc3_des_c,
coalesce(t2.atc4_des,t1.atc4_des) as atc4_des,
coalesce(t2.atc4_des_c,t1.atc4_des_c) as atc4_des_c,
coalesce(t2.app1_des,t1.app1_des) as app1_des,
coalesce(t2.app1_des_c,t1.app1_des_c) as app1_des_c,
coalesce(t2.app2_des,t1.app2_des) as app2_des,
coalesce(t2.app2_des_c,t1.app2_des_c) as app2_des_c,
coalesce(t2.app3_des,t1.app3_des) as app3_des,
coalesce(t2.app3_des_c,t1.app3_des_c) as app3_des_c,
coalesce(t2.Class,t1.Class) as Class,
from_utc_timestamp(current_timestamp(),'UTC+8') as ETL_INSERT_DT,
from_utc_timestamp(current_timestamp(),'UTC+8') as ETL_UPDATE_DT,
cast(t1.market_ratio as decimal(30,10)) as market_ratio,
coalesce(cast(t1.counting_unit / coalesce(t1.unit,1) as decimal(30,10)),1) as counting_unit,
t1.bu,
t1.Dosage_Unit,
t1.ta
from tmp.tmp_dm_ext_xiehe_pack_property t1
left join ims_pack_property t2
on t1.market_pack_key = t2.market_pack_key
where t1.market <> 'Non Market'
union
select distinct
concat('XIEHE ALL Market_',t1.iqvia_pack_code) as market_pack_key,
t1.iqvia_pack_code as PACK_COD,
max(coalesce(t2.PACK_DES,t1.pack_des)) as PACK_DES,
max(coalesce(t2.STGH_DES,t1.stgh_des)) as STGH_DES,
max(coalesce(t2.PACK_LCH,t1.pack_lch)) as PACK_LCH,
max(t2.Family_Code) as Family_Code,
max(t2.Family_Name) as Family_Name,
max(coalesce(t2.PROD_COD,t1.iqvia_prod_code)) as PROD_COD,
max(coalesce(t2.PROD_DES,t1.prod_des)) as PROD_DES,
max(coalesce(t2.PROD_DES_C,t1.prod_des_c)) as PROD_DES_C,
max(coalesce(t2.CMPS_COD,t1.cmps_cod)) as CMPS_COD,
max(coalesce(t2.CMPS_DES,t1.cmps_des)) as CMPS_DES,
max(coalesce(t2.CMPS_DES_C,t1.cmps_des_c)) as CMPS_DES_C,
max(coalesce(t2.ATC1_COD,t1.atc1_cod)) as ATC1_COD,
max(coalesce(t2.ATC2_COD,t1.atc2_cod)) as ATC2_COD,
max(coalesce(t2.ATC3_COD,t1.atc3_cod)) as ATC3_COD,
max(coalesce(t2.ATC4_COD,t1.atc4_cod)) as ATC4_COD,
max(coalesce(t2.APP1_COD,t1.app1_cod)) as APP1_COD,
max(coalesce(t2.APP2_COD,t1.app2_cod)) as APP2_COD,
max(coalesce(t2.APP3_COD,t1.app3_cod)) as APP3_COD,
max(coalesce(t2.BIO_DESC,t1.bio_desc)) as BIO_DESC,
max(coalesce(t2.GENE_ORIG_DESC,t1.gene_orig_desc)) as GENE_ORIG_DESC,
max(coalesce(t2.ETH_OTC_DESC,t1.eth_otc_desc)) as ETH_OTC_DESC,
max(coalesce(t2.NRDL_DESC,t1.nrdl_desc)) as NRDL_DESC,
max(coalesce(t2.NRDL_Entry_Date,t1.NRDL_Entry_Date)) as NRDL_Entry_Date,
max(coalesce(t2.EDL_DESC,t1.edl_desc)) as EDL_DESC,
max(coalesce(t2.TCM_DESC,t1.tcm_desc)) as TCM_DESC,
max(coalesce(t2.PAED_DESC,t1.paed_desc)) as PAED_DESC,
max(coalesce(t2.GQCE_DESC,t1.gqce_desc)) as GQCE_DESC,
max(coalesce(t2.VBP_DESC,t1.vbp_desc)) as VBP_DESC,
max(coalesce(t2.MANU_COD,t1.manu_cod)) as MANU_COD,
max(coalesce(t2.MANU_DES,t1.manu_des)) as MANU_DES,
max(coalesce(t2.MANU_DES_C,t1.manu_des_c)) as MANU_DES_C,
max(coalesce(t2.MNFL_COD,t1.mnfl_cod)) as MNFL_COD,
max(coalesce(t2.MNFL_DES,t1.mnfl_des)) as MNFL_DES,
max(coalesce(t2.CORP_COD,t1.corp_cod)) as CORP_COD,
max(coalesce(t2.CORP_DES,t1.corp_des)) as CORP_DES,
max(coalesce(t2.CORP_DES_C,t1.corp_des_c)) as CORP_DES_C,
max(coalesce(t2.BrandType,t1.BrandType)) as BrandType,
'XIEHE ALL Market' as MARKET,
max(t2.KEY_COMPETITOR) as KEY_COMPETITOR,
max(coalesce(t2.IS_AZ,t1.IS_AZ)) as IS_AZ,
max(coalesce(t2.AZ_MAIN,t1.AZ_MAIN)) as AZ_MAIN,
max(t2.AZ_Related) as AZ_Related,
max(coalesce(t2.atc1_des,t1.atc1_des)) as atc1_des,
max(coalesce(t2.atc1_des_c,t1.atc1_des_c)) as atc1_des_c,
max(coalesce(t2.atc2_des,t1.atc2_des)) as atc2_des,
max(coalesce(t2.atc2_des_c,t1.atc2_des_c)) as atc2_des_c,
max(coalesce(t2.atc3_des,t1.atc3_des)) as atc3_des,
max(coalesce(t2.atc3_des_c,t1.atc3_des_c)) as atc3_des_c,
max(coalesce(t2.atc4_des,t1.atc4_des)) as atc4_des,
max(coalesce(t2.atc4_des_c,t1.atc4_des_c)) as atc4_des_c,
max(coalesce(t2.app1_des,t1.app1_des)) as app1_des,
max(coalesce(t2.app1_des_c,t1.app1_des_c)) as app1_des_c,
max(coalesce(t2.app2_des,t1.app2_des)) as app2_des,
max(coalesce(t2.app2_des_c,t1.app2_des_c)) as app2_des_c,
max(coalesce(t2.app3_des,t1.app3_des)) as app3_des,
max(coalesce(t2.app3_des_c,t1.app3_des_c)) as app3_des_c,
max(coalesce(t2.Class,t1.Class)) as Class,
from_utc_timestamp(current_timestamp(),'UTC+8') as ETL_INSERT_DT,
from_utc_timestamp(current_timestamp(),'UTC+8') as ETL_UPDATE_DT,
max(cast(t1.market_ratio as decimal(30,10))) as market_ratio,
coalesce(max(cast(t1.counting_unit / coalesce(t1.unit,1) as decimal(30,10))),1) as counting_unit,
max(t1.bu) as bu,
max(t1.Dosage_Unit) as Dosage_Unit,
max(t1.ta) as ta
from tmp.tmp_dm_ext_xiehe_pack_property t1
left join ims_pack_property t2
on t1.market_pack_key = t2.market_pack_key
group by t1.iqvia_pack_code