删除备份版本notebook

This commit is contained in:
2026-05-11 23:19:31 +08:00
parent db99577102
commit 23755b4e1e
12 changed files with 0 additions and 13599 deletions

View File

@@ -1,195 +0,0 @@
-- Databricks notebook source
--
create or replace temporary view aia_rawdata_1 as
select
year,
qtr,
cast(year * 100 + ym as int) ym,
org_prov,
org_city,
ins_level,
case when ins_cd is not null and length(ins_cd) < 7 then right(concat('0000000',ins_cd),7) else ins_cd end as org_ins_cd,
atc1,
atc2,
atc3,
atc4,
atc,
mole_nm,
prod_nm,
org_prd_nm_c,
org_pk,
org_prd_str,
org_pth,
min_unit,
value,
unit,
cast(pack_desc as int) as pack_desc,
org_pk_un,
manu
from dwd.dwd_gnd_aia_rawdata
-- COMMAND ----------
--AUDIT_CODinst_code
create or replace temporary view aia_rawdata_2 as
with hospital_mapping as (
select distinct
case when length(cpa_hospital_code) < 7 then right(concat('0000000',cpa_hospital_code),7) else cpa_hospital_code end as cpa_hospital_code,
ins_cd_nl as inst_code
from dwd.dwd_gnd_hospital_not_provided
)
select upper(nvl(t2.inst_code,t1.org_ins_cd)) AUDIT_COD,
t1.*,
nvl(t2.inst_code,t1.org_ins_cd) as inst_code
from aia_rawdata_1 t1
left join hospital_mapping t2
on t1.org_ins_cd = t2.cpa_hospital_code
-- left join dm.dm_td_institution t3
-- on t2.ins_code = t3.inst_code
-- left join dm.dm_ims_td_geo t4
-- on t3.city_name_en = t4.city
-- COMMAND ----------
/*
修改人 chenwu
修改时间 20250513
修改内容 打包分子的数据,不论 打通表 和 事实表,都没有系数,需要用 事实表里的 最小制剂单位数量 作为counting_unit
*/
--pack_codcorp_codcounting_unit
create or replace temporary view aia_rawdata_3 as
select case when length(trim(t2.iqvia_pack_code)) < 12 and trim(t2.iqvia_pack_code) REGEXP '^[0-9]' then right(concat('000000000000',trim(t2.iqvia_pack_code)),12)
else trim(t2.iqvia_pack_code)
end as pack_cod,
t3.corp_cod,
case when left(t2.iqvia_pack_code,4) = 'AZP_' then cast(t1.min_unit/t1.org_pk_un as decimal(38,10)) else t3.counting_unit end counting_unit,
-- t3.counting_unit,20250513
t1.*
from aia_rawdata_2 t1
left join (select distinct trim(iqvia_pack_code) as iqvia_pack_code,
ATC1,ATC2,ATC3,ATC4,org_prd_cd,org_mole_nm_c,
org_manu_prd_nm_c,org_prd_nm_c,org_pk,org_prd_str,
org_pth,min_pk_unit,org_pk_unit,org_manu_nm from dwd.dwd_gnd_ext_aia_cpt_data) t2
--ATC1编码+ATC2编码+ATC3编码+ATC4编码+(ATC)+++++++++
on nvl(t1.ATC1,'') = nvl(t2.ATC1,'')
and nvl(t1.ATC2,'') = nvl(t2.ATC2,'')
and nvl(t1.ATC3,'') = nvl(t2.ATC3,'')
and nvl(t1.ATC4,'') = nvl(t2.ATC4,'')
and nvl(t1.ATC,'') = nvl(t2.org_prd_cd,'')
and nvl(t1.MOLE_NM,'') = nvl(t2.org_mole_nm_c,'')
and nvl(t1.PROD_NM,'') = nvl(t2.org_manu_prd_nm_c,'')
and nvl(t1.ORG_PRD_NM_C,'') = nvl(t2.org_prd_nm_c,'')
and nvl(t1.ORG_PK,'') = nvl(t2.org_pk,'')
and nvl(t1.ORG_PRD_STR,'') = nvl(t2.org_prd_str,'')
and nvl(t1.ORG_PTH,'') = nvl(t2.org_pth,'')
and nvl(t1.UNIT,'') = nvl(t2.min_pk_unit,'')
and nvl(t1.PACK_DESC,'') = nvl(t2.org_pk_unit,'')
and nvl(t1.MANU,'') = nvl(t2.org_manu_nm,'')
left join (select distinct PACK_COD,corp_cod,counting_unit from dm.dm_aia_pack_property) t3
on t2.iqvia_pack_code = t3.PACK_COD
-- COMMAND ----------
--ly销量
create or replace temporary view aia_rawdata_4 as
with max_ym as (
select max(ym) as max_ym from aia_rawdata_3
)
select
YM,
org_ins_cd,
inst_code,
AUDIT_COD,
pack_cod,
corp_cod,
sum(value) as value,
sum(volume) as volume,
sum(counting_unit) as counting_unit,
sum(value_ly) as value_ly,
sum(volume_ly) as volume_ly,
sum(counting_unit_ly) as counting_unit_ly
from (
select
ym,
org_ins_cd,
inst_code,
AUDIT_COD,
pack_cod,
corp_cod,
cast(value as decimal(38,10)) as value,
cast(ORG_PK_UN as decimal(38,10)) as volume,
cast(ORG_PK_UN as decimal(38,10)) * coalesce(cast(counting_unit as decimal(38,10)),1) as counting_unit,
0 as value_ly,
0 as volume_ly,
0 as counting_unit_ly
from aia_rawdata_3
union all
select
cast(cast(ym as int) + 100 as string) as ym,
org_ins_cd,
inst_code,
AUDIT_COD,
pack_cod,
corp_cod,
0 as value,
0 as volume,
0 as counting_unit,
cast(value as decimal(38,10)) as value_ly,
cast(ORG_PK_UN as decimal(38,10)) as volume_ly,
cast(ORG_PK_UN as decimal(38,10)) * coalesce(cast(counting_unit as decimal(38,10)),1) as counting_unit_ly
from aia_rawdata_3 t1
join max_ym t2
on 1=1
where cast(cast(ym as int) + 100 as string) <= t2.max_ym
)
group by ym,
org_ins_cd,
inst_code,
AUDIT_COD,
pack_cod,
corp_cod
-- COMMAND ----------
insert overwrite table dm.dm_ext_aia_sales(
ym,
org_ins_cd,
inst_code,
AUDIT_COD,
pack_cod,
corp_cod,
value,
volume,
counting_unit,
value_ly,
volume_ly,
counting_unit_ly
)
select
ym,
org_ins_cd,
inst_code,
AUDIT_COD,
pack_cod,
corp_cod,
sum(value) as value,
sum(volume) as volume,
sum(counting_unit) as counting_unit,
sum(value_ly) as value_ly,
sum(volume_ly) as volume_ly,
sum(counting_unit_ly) as counting_unit_ly
from aia_rawdata_4
group by
ym,
org_ins_cd,
inst_code,
AUDIT_COD,
pack_cod,
corp_cod

View File

@@ -1,643 +0,0 @@
-- Databricks notebook source
-- MAGIC %run ../../../Common/config
-- COMMAND ----------
-- MAGIC %python
-- MAGIC spark.read.table(f'`{CDW_CATALOG}`.`dwd`.`dim_product_wide`').createOrReplaceTempView('cdw_dwd_dim_product_wide')
-- MAGIC spark.read.table(f'`{CDW_CATALOG}`.`dwd`.`dwd_hospital_classification`').createOrReplaceTempView('cdw_dwd_dwd_hospital_classification')
-- COMMAND ----------
-----------------------------------------------------------------------------------
--修改时间20241021
--FanXujia
--
---1 ~ 2
-----------------------------------------------------------------------------------
--共有机构
insert overwrite table dm.dm_aia_hp_flag
with max_year as (
select
max(cast(year as int)) as max_year,
max(cast(year as int) - 1) as max_year_ly
from dwd.dwd_gnd_hospital_not_provided
)
,hp_flag as (
select
distinct concat(t1.year, '-Y') as hp_his_flag,
case when length(t1.cpa_hospital_code) < 7 then right(concat('0000000',t1.cpa_hospital_code),7) else t1.cpa_hospital_code end as cpa_hospital_code
from
dwd.dwd_gnd_hospital_not_provided t1
cross join max_year t2
where cast(t1.year as int) >= t2.max_year_ly
)
,hp_flag2 as (
SELECT
cpa_hospital_code,
CONCAT_WS('', SORT_ARRAY(COLLECT_LIST(hp_his_flag))) AS FLAG
FROM
hp_flag
GROUP BY
cpa_hospital_code
)
,inst_mapping as (
-- select
-- distinct right(concat('0000000', org_cd), 7) org_cd,
-- ins_cd
-- from
-- dwd.dwd_gnd_hospitalmapping
select distinct
case when length(cpa_hospital_code) < 7 then right(concat('0000000',cpa_hospital_code),7) else cpa_hospital_code end as cpa_hospital_code,
ins_cd_nl as inst_code
from dwd.dwd_gnd_hospital_not_provided
)
,ALL_INS as (
select distinct inst_code
from dm.dm_ext_aia_sales
)
,flag as (
select distinct
coalesce(t2.inst_code,t1.cpa_hospital_code) as inst_code,
t1.flag as aia_hp_flag,
'Y' as FLAG,
'AIA(Monthly)' as DATA_SOURCE
from hp_flag2 t1
left join inst_mapping t2 on t1.cpa_hospital_code = t2.cpa_hospital_code
where
length(t1.flag) > 6
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'IQVIA-CHPA(Monthly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'XH Data(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'EC(Monthly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'Retail(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'CHC(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'THC(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'IQVIA-COUNTY(Quarterly)' as DATA_SOURCE
)
select t1.inst_code,
NVL(t2.aia_hp_flag,'') as aia_hp_flag,
NVL(t2.FLAG,'N') as FLAG,
'AIA(Monthly)' as DATA_SOURCE
from ALL_INS t1
left join flag t2
on t1.inst_code = t2.inst_code
-- COMMAND ----------
-----------------------------------------------------------------------------------
--修改时间20241021
--FanXujia
--
---1 ~ 2
-----------------------------------------------------------------------------------
--共有机构
insert overwrite table dm.dm_aia_hp_flag
with max_year as (
select
max(cast(year as int)) as max_year,
max(cast(year as int) - 1) as max_year_ly
from dwd.dwd_gnd_hospital_not_provided
)
,hp_flag as (
select
distinct concat(t1.year, '-Y') as hp_his_flag,
case when length(t1.cpa_hospital_code) < 7 then right(concat('0000000',t1.cpa_hospital_code),7) else t1.cpa_hospital_code end as cpa_hospital_code
from
dwd.dwd_gnd_hospital_not_provided t1
cross join max_year t2
where cast(t1.year as int) >= t2.max_year_ly
)
,hp_flag2 as (
SELECT
cpa_hospital_code,
CONCAT_WS('', SORT_ARRAY(COLLECT_LIST(hp_his_flag))) AS FLAG
FROM
hp_flag
GROUP BY
cpa_hospital_code
)
,inst_mapping as (
-- select
-- distinct right(concat('0000000', org_cd), 7) org_cd,
-- ins_cd
-- from
-- dwd.dwd_gnd_hospitalmapping
select distinct
case when length(cpa_hospital_code) < 7 then right(concat('0000000',cpa_hospital_code),7) else cpa_hospital_code end as cpa_hospital_code,
ins_cd_nl as inst_code
from dwd.dwd_gnd_hospital_not_provided
)
,ALL_INS as (
select distinct inst_code
from dm.dm_ext_aia_sales
)
,flag as (
select distinct
coalesce(t2.inst_code,t1.cpa_hospital_code) as inst_code,
t1.flag as aia_hp_flag,
'Y' as FLAG,
'AIA(Monthly)' as DATA_SOURCE
from hp_flag2 t1
left join inst_mapping t2 on t1.cpa_hospital_code = t2.cpa_hospital_code
where
length(t1.flag) > 6
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'IQVIA-CHPA(Monthly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'XH Data(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'EC(Monthly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'Retail(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'CHC(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'THC(Quarterly)' as DATA_SOURCE
union all
select '' as inst_code,'' as aia_hp_flag,'' as FLAG,'IQVIA-COUNTY(Quarterly)' as DATA_SOURCE
)
select t1.inst_code,
NVL(t2.aia_hp_flag,'') as aia_hp_flag,
NVL(t2.FLAG,'N') as FLAG,
'AIA(Monthly)' as DATA_SOURCE
from ALL_INS t1
left join flag t2
on t1.inst_code = t2.inst_code
-- COMMAND ----------
--
----------------------------------------------------------------------------------------------------------------------
--修改时间20241029
--FanXujia
--
--12 + 0
--24 + 0
--20222023010
----------------------------------------------------------------------------------------------------------------------
insert overwrite table dm.dm_aia_provided_flag
with max_ym as (
select max(ym) as max_ym
from dm.dm_ext_aia_sales
)
,ym_range as (
select max_ym,
case when max_ym <= '202412' then cast(right(max_ym,2) as int)
else cast(12 as int)
end ym_12m,
case when max_ym <= '202312' then '202401'
when right(max_ym,2) = '12' then concat(left(max_ym,4),'01')
else concat(cast(left(max_ym, 4) - 1 as int),right(concat('0', cast(right(max_ym, 2) + 1 as int)), 2))
end as ym_12m_before,
case when max_ym <= '202312' then cast(right(max_ym,2) as int)
when max_ym <= '202512' then cast(right(max_ym,2) as int) + 12
else cast(24 as int)
end as ym_24m,
case when max_ym <= '202512' then '202401'
when right(max_ym,2) = '12' then concat(cast(left(max_ym,4) - 1 as int),'01')
else concat(cast(left(max_ym, 4) -2 as int),right(concat('0', cast(right(max_ym, 2) + 1 as int)), 2))
end as ym_24m_before
from max_ym
)
,sales_1 as (
select
inst_code,
ym
from
dm.dm_ext_aia_sales
group by
ym,
inst_code
having
sum(value) <> 0
)
,sales_2 as (
select
t1.inst_code,
t1.ym,
t2.max_ym,
t2.ym_12m,
t2.ym_12m_before,
t2.ym_24m,
t2.ym_24m_before
from sales_1 t1
cross join ym_range t2
)
,flag_12m as (
select inst_code,
'滚动一年有数' as aia_provided_flag,
'AIA(Monthly)' as DATA_SOURCE
from sales_2
where ym <= max_ym
and ym >= ym_12m_before
group by inst_code
having count(1) = (select ym_12m from ym_range)
)
,flag_24m as (
select inst_code,
'滚动两年有数' as aia_provided_flag,
'AIA(Monthly)' as DATA_SOURCE
from sales_2
where ym <= max_ym
and ym >= ym_24m_before
group by inst_code
having count(1) = (select ym_24m from ym_range)
)
select inst_code,
aia_provided_flag,
DATA_SOURCE
from flag_12m
union all
select inst_code,
aia_provided_flag,
DATA_SOURCE
from flag_24m
union all
select '' as inst_code,'' as aia_provided_flag,'IQVIA-CHPA(Monthly)' as data_source
union all
select '' as inst_code,'' as aia_provided_flag,'XH Data(Quarterly)' as data_source
union all
select '' as inst_code,'' as aia_provided_flag,'EC(Monthly)' as data_source
union all
select '' as inst_code,'' as aia_provided_flag,'Retail(Quarterly)' as data_source
union all
select '' as inst_code,'' as aia_provided_flag,'CHC(Quarterly)' as data_source
union all
select '' as inst_code,'' as aia_provided_flag,'THC(Quarterly)' as data_source
union all
select '' as inst_code,'' as aia_provided_flag,'IQVIA-COUNTY(Quarterly)' as data_source
-- COMMAND ----------
---------------------------------------------------------------------------
--修改时间20241108
--Fanxujia
--
--for AIA Dashboard
--sub_channel,dragon_flag,ka_flag,target_ins_level字段
---------------------------------------------------------------------------
---------------------------------------------------------------------------
--20241209
--Fanxujia
--
--sub channel的排序
---------------------------------------------------------------------------
--目标医院
insert overwrite table dm.dm_aia_targethp_flag -- +
with sales as (
select distinct t1.inst_code,t2.MARKET
from dm.dm_ext_aia_sales t1
left join dm.dm_aia_pack_property t2
on t1.pack_cod = t2.PACK_COD
)
--
,pack_market as (
select distinct
PACK_COD,
MARKET
from dm.dm_aia_pack_property
)
--KA_Flagtarget_ins_level以及目标机构Flag()
,td_territory_inst as (
select distinct
inst_code,
trty_code,
family_code,
ka_flag,
TARGET_LEVEL as target_ins_level
from dm.dm_td_sd_territory_indication --eagle的非重点医院同样加为目标机构
where yyyymm in (
select
key_value
from
dm.dm_td_date_config
where
key_code = 'pfmc_cvh'
)
and bu not in ('CUBE','KA')
and key_hp<>3 --chenwu 20250904
)
--线Channelsub_channel
,td_org as (
select distinct
t1.mr_trty_code,
case when t1.bu_code in ('PC14','PC16','PC18','PC19') then 'BU'
when t1.bu_code = 'SC01' and t1.sub_bu_code = 'NA29' then sub_bu_name
when t1.bu_code = 'SC01' then 'BU'
when t1.bu_code <> 'SC01' and t2.bm_name = 'BCBH' then 'BU'
when t1.bu_code <> 'SC01' then t1.bu_name
end as channel
from dm.dm_td_org t1
left join dws.dws_td_bu_active t2
on t1.bu_name = t2.bu_name
where t1.yyyymm in (
select
key_value
from
dm.dm_td_date_config
where
key_code = 'pfmc_cvh'
)
)
--Family粒度和Brand粒度的映射表
,product_wide as (
select distinct
prd_family_cd,
prd_brand_cd
from cdw_dwd_dim_product_wide
)
--Brand粒度与外部pack_cod的映射表
,imscode_azcode as (
select distinct
brandcode,
case when length(ims_pack_cod) < 12 and ims_pack_cod REGEXP '^[0-9]' then right(concat('00000000000',ims_pack_cod),12) else ims_pack_cod end as ims_pack_cod
from dwd.dwd_td_imscode_azcode
)
--KA_flagtarget_ins_levelchannelFlag
,target_hp_all as (
select
distinct
t1.inst_code,
t1.ka_flag,
t1.target_ins_level,
t2.channel,
t5.MARKET
from
td_territory_inst t1
inner join td_org t2 on t1.trty_code = t2.mr_trty_code
left join product_wide t3 on t3.prd_family_cd = t1.family_code
left join imscode_azcode t4 on t3.prd_brand_cd = t4.brandcode
left join pack_market t5 on t4.ims_pack_cod = t5.pack_cod
where t5.MARKET is not null
)
--target_hp_all里一个机构一个市场对应了多个属性
--ka_flag
,ka_flag1 as (
select distinct
inst_code,
MARKET,
ka_flag
from target_hp_all
)
,ka_flag2 as (
select
inst_code,
MARKET,
ka_flag,
row_number() over(partition by inst_code,market
order by case when ka_flag = 'KA' then 1
ELSE 2 end asc) as RN
from ka_flag1
)
,ka_flag_final as (
select
inst_code,
MARKET,
ka_flag
from ka_flag2
where RN = 1
)
--target_ins_level不取唯一值
--hardcodingONC Market和ALL Market设置成NA
,target_ins_level1 as (
select distinct
inst_code,
MARKET,
case when MARKET in ('AIA ALL Market','ONC Market') then 'NA'
else target_ins_level
end as target_ins_level
from target_hp_all
)
,target_ins_level_final as (
select distinct
inst_code,
MARKET,
target_ins_level
from target_ins_level1
)
--target_hp_all里一个机构一个市场对应了多个属性
--Channel
,Channel1 as (
select distinct
inst_code,
MARKET,
channel
from target_hp_all
)
,channel2 as (
select
inst_code,
MARKET,
channel,
row_number() over(partition by inst_code,market
order by case when channel = 'BU' then 1
when channel = 'BBU_County' then 2
when channel = 'CHC' then 3
when channel = 'Eagle' then 4
ELSE 5 end asc) as RN
from channel1
)
,channel_final as (
select
inst_code,
MARKET,
channel
from channel2
where RN = 1
)
--
,target_hp1 as (
select distinct
inst_code,
MARKET
from target_hp_all
)
,target_hp as (
select t1.inst_code,
t1.market,
t2.ka_flag,
t3.target_ins_level,
t4.channel
from target_hp1 t1
left join ka_flag_final t2
on t1.inst_code = t2.inst_code
and t1.market = t2.market
left join target_ins_level_final t3
on t1.inst_code = t3.inst_code
and t1.market = t3.market
left join channel_final t4
on t1.inst_code = t4.inst_code
and t1.market = t4.market
)
--Dragon_type
--Dragon_type是否为County
,county as (
select distinct inst_code,county_tier_desc
from dm.dm_td_institution
)
--2
-- 20250825修改
,dragon_type as (
select src_hcc_code as inst_code,
src_prod_code as family_code,
max(is_core) is_core
from cdw_dwd_dwd_hospital_classification
where active_status = 1
and src_sales_cycle = (select key_value from dm.dm_td_date_config where key_code = 'pfmc_cvh')
group by src_hcc_code,
src_prod_code
)
--Family_code转为pack_cod
,dragon_type_market1 as (
select distinct
t1.inst_code,
t1.is_core,
t4.market
from dragon_type t1
left join cdw_dwd_dim_product_wide t2
on t1.family_code = t2.prd_family_cd
left join dwd.dwd_td_imscode_azcode t3
on t2.prd_brand_cd = t3.brandcode
left join pack_market t4
on t3.ims_pack_cod = t4.pack_cod
where t4.market is not null
)
--
,dragon_type_market2 as (
select distinct
inst_code,
is_core,
market,
row_number() over(partition by inst_code,market
order by case when is_core = 'CORE' then 1
when is_core = 'EMERGING' then 2
else 3 end asc) as RN
from dragon_type_market1
)
,dragon_type_market as (
select distinct
inst_code,
is_core,
market
from dragon_type_market2
where RN = 1
)
--dragon_flag
,dragon_flag1 as (
select distinct
t0.inst_code,
t0.market,
case when t1.county_tier_desc in ('County','County level city') then 'County'
when t2.is_core is null or length(t2.is_core) = 0 then 'Others'
else t2.is_core
end as dragon_flag
from target_hp t0
left join county t1
on t0.inst_code = t1.inst_code
left join dragon_type_market t2
on t0.inst_code = t2.inst_code
and t0.MARKET = t2.MARKET
)
--
,dragon_flag2 as (
select inst_code,
market,
dragon_flag,
row_number() over(partition by inst_code,market
order by case when dragon_flag = 'CORE' then 1
when dragon_flag = 'EMERGING' then 2
when dragon_flag = 'County' then 3
else 4 end asc) as RN
from dragon_flag1
)
,dragon_flag as (
select inst_code,
market,
dragon_flag
from dragon_flag2
where RN = 1
)
--
,other_data_source as (
select explode(array('IQVIA-CHPA(Monthly)','XH Data(Quarterly)','EC(Monthly)','Retail(Quarterly)','CHC(Quarterly)','THC(Quarterly)','IQVIA-COUNTY(Quarterly)'))
)
,col as (
select '' as inst_code,
'' as MARKET,
'' as is_target_hp,
'' as sub_channel,
'' as dragon_flag,
'' as ka_flag,
'' as target_ins_level
)
,temp_1 as (
select distinct
case when t1.inst_code is null or t1.inst_code = '' then 'ROC'
else t1.inst_code
end as inst_code,
t1.MARKET,
case when t2.inst_code is null then 'N' else 'Y' end as is_target_hp,
case when t2.channel = 'BU' and geo.county_tier_desc in ('County','County level city') then 'BU_County'
when t2.channel = 'BU' and geo.county_tier_desc in ('City-3b','City-4a','City-4b') then 'BU_Emerging'
when t2.channel = 'BU' and t3.dragon_flag = 'CORE' then 'BU_Core'
when t2.channel = 'BU' then 'BU_Emerging'
when t2.channel in ('Eagle','CHC','BBU_County') then t2.channel
when t2.inst_code is null then 'NON_TARGET'
else 'NA'
end as sub_channel,
case when t3.dragon_flag is null then 'NON_TARGET'
else t3.dragon_flag
end as dragon_flag,
case when t2.ka_flag is null then 'NON_TARGET'
else t2.ka_flag
end as ka_flag,
case when t2.target_ins_level is null then 'NON_TARGET'
else t2.target_ins_level
end as target_ins_level,
'AIA(Monthly)' as DATA_SOURCE
from sales t1
left join target_hp t2
on t1.inst_code = t2.inst_code
and t1.MARKET = t2.MARKET
left join dragon_flag t3
on t1.inst_code = t3.inst_code
and t1.MARKET = t3.MARKET
left join dm.dm_td_institution inst
on t1.inst_code =inst.inst_code
left join dm.dm_td_geography geo
on inst.county_code = geo.geo_key
union all
select *
from col
cross join other_data_source
)
,sub_channel_rank as (
select distinct SUB_CHANNELSTD,SUB_CHANNEL_ORDER
from dm.dm_td_sd_channel
)
select t1.*,NVL(t2.SUB_CHANNEL_ORDER,100) as SUB_CHANNEL_ORDER
from temp_1 t1
left join sub_channel_rank t2
on t1.sub_channel = t2.SUB_CHANNELSTD

File diff suppressed because it is too large Load Diff

View File

@@ -1,228 +0,0 @@
-- Databricks notebook source
create or replace temporary view brand_province_config -- brand配置表
as
select upper(`brand`) as brand, `province` as province, datasource as data_source from dwd.dwd_gnd_merge_data_brand_province
-- COMMAND ----------
create or replace temporary view market_province_config --market
as
select market, province, datasource as data_source from dwd.dwd_gnd_merge_data_brand_market
-- COMMAND ----------
insert overwrite dws.dws_external_merge_data_config_special
-- LOCATION 'abfss://master@azcdatalakeprd.dfs.core.chinacloudapi.cn/DWS/dws_external_merge_data_config_special' --prd
-- LOCATION 'abfss://master@retaildlstoragetest.dfs.core.chinacloudapi.cn/DWS/dws_external_merge_data_config_special' --test
with pack_and_brand ( --brand pack
select
upper(brandnamee) brand_name
,ims_pack_cod PACK_COD --code带出market
from dwd.dwd_td_imscode_azcode
where ims_pack_cod is not null and startswith(ims_pack_cod,'0')
and upper(brandnamee) in (select brand from brand_province_config group by 1 )--pack
group by 1,2
)
,pack_to_market (
select
a.PACK_COD
,MARKET
from dm.dm_td_external_market_pack_mapping a
inner join pack_and_brand b on a.PACK_COD = b.pack_cod --brand对应的 market
where DATA_SOURCE in ( 'AIA(Monthly)')-- AIA中的市场chpa的市场太多
and lower(a.MARKET) not like '%all market%'
and market not in (
'RE Extended Market'
,'RE Market'
,'RE Market (Global)'
,'Pulmicort VBP Benchmark Market'
,'Respules Market'
)
group by 1,2
)
,market_to_pack ( -- 获取上述market中所有的pack
select
market
,pack_cod
from dm.dm_td_external_market_pack_mapping a
where DATA_SOURCE in ( 'AIA(Monthly)','IQVIA-CHPA(Monthly)')--AIA和chpa的市场
and market in (select market from pack_to_market group by 1)
group by 1,2
)
,province_to_audit (
select
PROVINCE_C
,AUDIT_COD
,DATA_SOURCE
,CASE WHEN DATA_SOURCE = 'AIA(Monthly)' THEN 'AIA'
WHEN DATA_SOURCE = 'IQVIA-CHPA(Monthly)' THEN 'CHPA'
ELSE NULL
END AS SOURCENAME
from dm.dm_td_external_geo
where DATA_SOURCE in ( 'AIA(Monthly)','IQVIA-CHPA(Monthly)')
group by 1,2,3
)
,not_selected_datasource (--pack2market2pack pack
select
a.ims_pack_cod PACK_COD --code
,b.province
from dwd.dwd_td_imscode_azcode a
inner join (
select
UPPER(brand) as brand
,province
from brand_province_config
where data_source = '/'
group by 1,2
) b
on upper(a.brandnamee) = upper(b.brand )
where a.ims_pack_cod is not null and startswith(a.ims_pack_cod,'0')
group by 1,2
union
select
pack_cod,
'天津'
from market_to_pack
where market= 'EGFR TKI Market'
group by 1
)
select DATA_SOURCE,PACK_COD,AUDIT_COD
from not_selected_datasource a
left join province_to_audit b on a.province = b.province_c
group by 1,2,3
-- COMMAND ----------
INSERT OVERWRITE dws.dws_external_merge_data_config
-- create or replace table dws.dws_external_merge_data_config
/*
1通过brand找到pack通过pack找到市场以市场下的所有pack
2通过province找到对应的省份或者机构从而获取到audit_code
*/
----pack
with pack_and_brand ( --brand pack
select
upper(brandnamee) brand_name
,ims_pack_cod PACK_COD --code带出market
from dwd.dwd_td_imscode_azcode
where ims_pack_cod is not null and startswith(ims_pack_cod,'0')
and upper(brandnamee) in (select brand from brand_province_config group by 1 )--pack
group by 1,2
)
,pack_to_market (
select
a.PACK_COD
,MARKET
from dm.dm_td_external_market_pack_mapping a
inner join pack_and_brand b on a.PACK_COD = b.pack_cod --brand对应的 market
where DATA_SOURCE in ( 'AIA(Monthly)')-- AIA中的市场chpa的市场太多
and lower(a.MARKET) not like '%all market%'
and market not in (
'RE Extended Market'
,'RE Market'
,'RE Market (Global)'
,'Pulmicort VBP Benchmark Market'
,'Respules Market'
)
group by 1,2
)
,market_to_pack ( -- 获取上述market中所有的pack
select
market
,pack_cod
from dm.dm_td_external_market_pack_mapping a
where DATA_SOURCE in ( 'AIA(Monthly)','IQVIA-CHPA(Monthly)')--AIA和chpa的市场
and market in (select market from pack_to_market group by 1)
group by 1,2
)
,province_to_audit (
select
PROVINCE_C
,AUDIT_COD
,DATA_SOURCE
,CASE WHEN DATA_SOURCE = 'AIA(Monthly)' THEN 'AIA'
WHEN DATA_SOURCE = 'IQVIA-CHPA(Monthly)' THEN 'CHPA'
ELSE NULL
END AS SOURCENAME
from dm.dm_td_external_geo
where DATA_SOURCE in ( 'AIA(Monthly)','IQVIA-CHPA(Monthly)')
group by 1,2,3
)
,pack_result as (
select
D.pack_cod
,E.audit_cod
,A.province
,A.data_source as SOURCENAME
,E.data_source
from brand_province_config A
inner join pack_and_brand B on A.brand = B.brand_name
inner join pack_to_market C on B.pack_cod = C.pack_cod
inner join market_to_pack D on C.market = D.market
inner join province_to_audit E on A.province = E.province_c AND A.data_source = E.SOURCENAME
where A.data_source <> '/'
group by 1,2,3,4,5
)
----market
,config_market (
select
market
from market_province_config
group by 1
)
,market_to_pack_dtp (
select
PACK_COD,MARKET
from dm.dm_td_external_market_pack_mapping
where DATA_SOURCE = 'DTP(Quarterly)' and PACK_COD in (
select
PACK_COD
from dm.dm_tf_external_sales
where DTP_FLAG =1
group by 1)
)
,final_market_to_pack (--market成pack的部分
select PACK_COD,a.MARKET from market_to_pack a inner join config_market b on a.market = b.market
union
select PACK_COD,a.MARKET from market_to_pack_dtp a inner join config_market b on a.market = b.market
)
----market
,market_result as (
select
B.pack_cod
,C.AUDIT_COD
,A.province
,A.data_source as SOURCENAME
,C.DATA_SOURCE
from market_province_config a
left join final_market_to_pack B on A.market = B.market
left join province_to_audit C on A.province = C.province_c AND A.data_source = C.SOURCENAME
where a.data_source <> '/' AND C.AUDIT_COD IS not NULL
group by 1,2,3,4,5)
,need_to_be_changed( --pack pack code market中的pack code
select
pack_cod,AUDIT_COD,DATA_SOURCE,sourcename
from market_result
where pack_cod in (select pack_cod from pack_result group by 1)
)
--
select
pack_cod,AUDIT_COD,DATA_SOURCE,sourcename, 'pack' CONFIG_DATASOURCE
from pack_result
union ALL
select
pack_cod,AUDIT_COD,DATA_SOURCE,sourcename, 'market' CONFIG_DATASOURCE
from pack_result
where pack_cod not in (select pack_cod from need_to_be_changed group by 1)
union all
select
pack_cod,AUDIT_COD,DATA_SOURCE,sourcename, 'market' CONFIG_DATASOURCE
from need_to_be_changed

View File

@@ -1,124 +0,0 @@
-- Databricks notebook source
-- create or replace table dm.dm_tf_exteranl_sales_merged_data_dtp_others
insert overwrite table dm.dm_tf_exteranl_sales_merged_data_dtp_others
with all_egfr_pack as (
select * from DM.dm_td_external_market_pack_mapping
where DATA_SOURCE = 'DTP(Quarterly)' and MARKET = 'EGFR TKI Market'
)
,4brand as (
select * from all_egfr_pack
where PROD_DES_C in ('阿美乐','泰瑞沙','凯美纳','艾弗沙')
)
,otherbrand as (
select PACK_COD from all_egfr_pack
where PACK_COD not in ( select PACK_COD from `4brand` group by 1)
group by 1
)
-- -- 直取表 cu - dtp4brand 的 cu = otherbrand 的cu
,dirct_cu as (
select
yyyymm
,replace(audit_key,'DTP(Quarterly)','') audit_cod
,cast(pdot_counting_unit as decimal ) pdot_counting_unit
,cast(pdot_counting_unit_ly as decimal ) pdot_counting_unit_ly
from dm.dm_tf_external_retail_special a
where yyyymm >=202401
)
-- 4brand cu value
,fact_sales_dtp_4brand(
select
YYYYMM,AUDIT_COD
,sum(cast( CONUTING_UNIT*PDOT_MARKET_RATIO as decimal)) CONUTING_UNIT
,sum(cast( CONUTING_UNIT_LY*PDOT_MARKET_RATIO as decimal)) CONUTING_UNIT_LY
from dm.dm_tf_external_sales t1
inner join 4brand t2 on t1.PACK_COD = t2.PACK_COD
where t1.DATA_SOURCE = 'DTP(Quarterly)'
and DTP_FLAG = 1
and YYYYMM >=202401
group by
YYYYMM,AUDIT_COD )
-- 其他产品 的 cu 和 value
,fact_sales_dtp_other(
select
YYYYMM
,"DTPOTHERS" PACK_COD
,"DTPOTHERS" CORP_COD
,AUDIT_COD
,'' as PLATFORM_TYPE
,'' as STORE_NAME
,'' as STORE_TYPE
,'' as REGION_TYPE
,1 as PACK_FLAG
,2 as PROD_FLAG
,1 as DTP_FLAG
,sum(SALES_UNIT_CAL) SALES_UNIT_CAL
,sum(SALES_UNIT_CAL_LY) SALES_UNIT_CAL_LY
,sum(SALES_VALUE_CAL) SALES_VALUE_CAL
,sum(SALES_VALUE_CAL_LY) SALES_VALUE_CAL_LY
,sum(CONUTING_UNIT) CONUTING_UNIT
,sum(CONUTING_UNIT_LY) CONUTING_UNIT_LY
,'DTP(Quarterly)' as DATA_SOURCE
,'' as inst_code
,'' as cmps_flag
,'' as dept_name
,'' as prescription
,'' as prescription_ly
,'' as new_code
,'' as area
,'' as h_level
,'' as reimburse
,'' as reimburse_type
,'' as prescription_source
from dm.dm_tf_external_sales
where DTP_FLAG = 1
and PACK_COD in (select PACK_COD from otherbrand) --
and DATA_SOURCE = 'DTP(Quarterly)'
and YYYYMM >=202401
group by
YYYYMM,AUDIT_COD)
select
T0.YYYYMM
,t0.PACK_COD
,t0.CORP_COD
,t0.AUDIT_COD
,t0.PLATFORM_TYPE
,t0.STORE_NAME
,t0.STORE_TYPE
,t0.REGION_TYPE
,t0.PACK_FLAG
,t0.PROD_FLAG
,t0.DTP_FLAG
,t0.SALES_UNIT_CAL
,t0.SALES_UNIT_CAL_LY
,t0.SALES_VALUE_CAL
,t0.SALES_VALUE_CAL_LY
,t1.CONUTING_UNIT
,t1.CONUTING_UNIT_LY
,t0.DATA_SOURCE
,t0.inst_code
,t0.cmps_flag
,t0.dept_name
,t0.prescription
,t0.prescription_ly
,t0.new_code
,t0.area
,t0.h_level
,t0.reimburse
,t0.reimburse_type
,t0.prescription_source
from fact_sales_dtp_other t0
left join (
select
t1.yyyymm
,t1.audit_cod
,t1.pdot_counting_unit - t2.conuting_unit as CONUTING_UNIT
,t1.pdot_counting_unit_ly - t2.conuting_unit_ly as CONUTING_UNIT_LY
from dirct_cu t1
left join fact_sales_dtp_4brand t2 on t1.yyyymm = t2.yyyymm and t1.audit_cod = t2.audit_cod
) t1
on t0.audit_cod = t1.audit_cod and t0.yyyymm = t1.yyyymm
-- select * from fact_sales_dtp_4brand

View File

@@ -1,209 +0,0 @@
# Databricks notebook source
############################################################START##############################################################
### STEP-1: insert splited pack data into tmp final table: tmp_retail_final_sales
# COMMAND ----------
# MAGIC %sql
# MAGIC -------------------------------------------------------------------------------------
# MAGIC -- STEP-1: insert splited pack data into tmp final table
# MAGIC -- insert into tmp_retail_final_sales
# MAGIC -------------------------------------------------------------------------------------
# MAGIC
# MAGIC with tmp_pack as (
# MAGIC select
# MAGIC ------------------------------------------------------
# MAGIC -- 有月度数据使用月度数据,无月度数据用季度数据去转
# MAGIC nvl(
# MAGIC a.month,
# MAGIC CONCAT(
# MAGIC SUBSTRING(a.quarter, 1, 4), -- 提取年份前4位
# MAGIC CASE
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '1' THEN '03' -- Q1 → 03月
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '2' THEN '06' -- Q2 → 06月
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '3' THEN '09' -- Q3 → 09月
# MAGIC WHEN SUBSTRING(a.quarter, 6, 1) = '4' THEN '12' -- Q4 → 12月
# MAGIC END
# MAGIC )
# MAGIC ) as YYYYMM,
# MAGIC ------------------------------------------------------
# MAGIC a.pack_code as iqvia_pack_code,
# MAGIC a.product_id as zk_product_id,
# MAGIC case when a.product_desc <> 'others' then a.product_desc else null end as prod_des_c,
# MAGIC case when a.product_desc <> 'others' then a.product_desc else concat('Others_', a.molecule_desc) end as PROD_MAPPING,
# MAGIC a.zk_regin as province_city,
# MAGIC a.level_market as market,
# MAGIC a.sales_value,
# MAGIC a.sales_unit,
# MAGIC ------------------------------------------------------
# MAGIC -- counting_unit取值逻辑
# MAGIC -- 不能直接取原始pack文件表中的值改为取pack_property表中counting_unit / unit的值
# MAGIC a.sales_unit * (b.counting_unit/ coalesce(b.unit,1)) as counting_unit,
# MAGIC ------------------------------------------------------
# MAGIC case when data_flag = 0 then 1 else 2 end as pack_flag,
# MAGIC case when brand_flag = 1 then 1 else 2 end as brand_flag
# MAGIC from tmp.tmp_retail_pack_rawdata a
# MAGIC left join dwd.dwd_gnd_ext_retail_pack_property b
# MAGIC on a.product_id = b.product_id
# MAGIC ), tmp_has_roc as (
# MAGIC select
# MAGIC product_id,
# MAGIC quarter,
# MAGIC ------------------------------------------------------
# MAGIC -- 有月度数据使用月度数据,无月度数据用季度数据去转
# MAGIC nvl(
# MAGIC month,
# MAGIC CONCAT(
# MAGIC SUBSTRING(quarter, 1, 4), -- 提取年份前4位
# MAGIC CASE
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '1' THEN '03' -- Q1 → 03月
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '2' THEN '06' -- Q2 → 06月
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '3' THEN '09' -- Q3 → 09月
# MAGIC WHEN SUBSTRING(quarter, 6, 1) = '4' THEN '12' -- Q4 → 12月
# MAGIC END
# MAGIC )
# MAGIC ) as month,
# MAGIC ------------------------------------------------------
# MAGIC pack_code
# MAGIC from tmp.tmp_retail_pack_rawdata
# MAGIC where zk_regin = 'ROC'
# MAGIC ), tmp_pack_this_year_with_roc as (
# MAGIC select
# MAGIC *
# MAGIC from tmp_pack a
# MAGIC where exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC ) and a.province_city <> '全国'
# MAGIC ), tmp_pack_next_year_with_roc as (
# MAGIC select
# MAGIC cast(YYYYMM + 100 as int) as YYYYMM,
# MAGIC iqvia_pack_code,
# MAGIC zk_product_id,
# MAGIC prod_des_c,
# MAGIC PROD_MAPPING,
# MAGIC province_city,
# MAGIC market,
# MAGIC sales_value as sales_value_ly,
# MAGIC sales_unit as sales_unit_ly,
# MAGIC counting_unit as counting_unit_ly,
# MAGIC pack_flag,
# MAGIC brand_flag
# MAGIC from tmp_pack a
# MAGIC where YYYYMM + 100 <= (select max(YYYYMM) from tmp_pack)
# MAGIC and exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC ) and a.province_city <> '全国'
# MAGIC
# MAGIC ), tmp_pack_this_year_without_roc as (
# MAGIC select
# MAGIC *
# MAGIC from tmp_pack a
# MAGIC where not exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC )
# MAGIC ), tmp_pack_next_year_without_roc as (
# MAGIC select
# MAGIC cast(YYYYMM + 100 as int) as YYYYMM,
# MAGIC iqvia_pack_code,
# MAGIC zk_product_id,
# MAGIC prod_des_c,
# MAGIC PROD_MAPPING,
# MAGIC province_city,
# MAGIC market,
# MAGIC sales_value as sales_value_ly,
# MAGIC sales_unit as sales_unit_ly,
# MAGIC counting_unit as counting_unit_ly,
# MAGIC pack_flag,
# MAGIC brand_flag
# MAGIC from tmp_pack a
# MAGIC where YYYYMM + 100 <= (select max(YYYYMM) from tmp_pack)
# MAGIC and not exists(
# MAGIC select * from tmp_has_roc b
# MAGIC where a.YYYYMM = b.month
# MAGIC and a.iqvia_pack_code = b.pack_code
# MAGIC and a.zk_product_id = b.product_id
# MAGIC )
# MAGIC ), tmp_final_sales as (
# MAGIC select
# MAGIC ifnull(a.yyyymm, b.yyyymm) as yyyymm,
# MAGIC ifnull(a.iqvia_pack_code, b.iqvia_pack_code) as iqvia_pack_code,
# MAGIC ifnull(a.zk_product_id, b.zk_product_id) as zk_product_id,
# MAGIC ifnull(a.prod_des_c, b.prod_des_c) as prod_des_c,
# MAGIC ifnull(a.PROD_MAPPING, b.PROD_MAPPING) as PROD_MAPPING,
# MAGIC ifnull(a.province_city, b.province_city) as province_city,
# MAGIC ifnull(a.market, b.market) as market,
# MAGIC ifnull(a.sales_value, 0) as sales_value,
# MAGIC ifnull(a.sales_unit, 0) as sales_unit,
# MAGIC ifnull(a.counting_unit, 0) as counting_unit,
# MAGIC ifnull(a.pack_flag, b.pack_flag) as pack_flag,
# MAGIC ifnull(a.brand_flag,b.brand_flag ) as brand_flag,
# MAGIC ifnull(b.sales_value_ly, 0) as sales_value_ly,
# MAGIC ifnull(b.sales_unit_ly, 0) as sales_unit_ly,
# MAGIC ifnull(b.counting_unit_ly, 0) as counting_unit_ly
# MAGIC from tmp_pack_this_year_with_roc a
# MAGIC full outer join tmp_pack_next_year_with_roc b
# MAGIC on a.YYYYMM = b.YYYYMM
# MAGIC and a.iqvia_pack_code = b.iqvia_pack_code
# MAGIC and a.zk_product_id = b.zk_product_id
# MAGIC and a.province_city = b.province_city
# MAGIC
# MAGIC union all
# MAGIC
# MAGIC select
# MAGIC ifnull(c.yyyymm, d.yyyymm) as yyyymm,
# MAGIC ifnull(c.iqvia_pack_code, d.iqvia_pack_code) as iqvia_pack_code,
# MAGIC ifnull(c.zk_product_id, d.zk_product_id) as zk_product_id,
# MAGIC ifnull(c.prod_des_c, d.prod_des_c) as prod_des_c,
# MAGIC ifnull(c.PROD_MAPPING, d.PROD_MAPPING) as PROD_MAPPING,
# MAGIC 'ROC' as province_city,
# MAGIC ifnull(c.market, d.market) as market,
# MAGIC ifnull(c.sales_value, 0) as sales_value,
# MAGIC ifnull(c.sales_unit, 0) as sales_unit,
# MAGIC ifnull(c.counting_unit, 0) as counting_unit,
# MAGIC --ifnull(c.pack_flag, d.pack_flag) as pack_flag,
# MAGIC 2 as pack_flag, -- 此类没有拆分比例且pack只有全国的数pack_flag固定为2
# MAGIC ifnull(c.brand_flag,d.brand_flag ) as brand_flag,
# MAGIC ifnull(d.sales_value_ly, 0) as sales_value_ly,
# MAGIC ifnull(d.sales_unit_ly, 0) as sales_unit_ly,
# MAGIC ifnull(d.counting_unit_ly, 0) as counting_unit_ly
# MAGIC from tmp_pack_this_year_without_roc c
# MAGIC full outer join tmp_pack_next_year_without_roc d
# MAGIC on c.YYYYMM = d.YYYYMM
# MAGIC and c.iqvia_pack_code = d.iqvia_pack_code
# MAGIC and c.zk_product_id = d.zk_product_id
# MAGIC and c.province_city = d.province_city
# MAGIC )
# MAGIC
# MAGIC insert overwrite table tmp.tmp_retail_final_sales
# MAGIC
# MAGIC select
# MAGIC yyyymm,
# MAGIC iqvia_pack_code,
# MAGIC zk_product_id,
# MAGIC prod_des_c,
# MAGIC PROD_MAPPING,
# MAGIC province_city,
# MAGIC market,
# MAGIC sales_value,
# MAGIC sales_value_ly,
# MAGIC sales_unit,
# MAGIC sales_unit_ly,
# MAGIC counting_unit,
# MAGIC counting_unit_ly,
# MAGIC pack_flag,
# MAGIC brand_flag
# MAGIC from tmp_final_sales
# MAGIC order by yyyymm
# COMMAND ----------
############################################################END################################################################

View File

@@ -1,367 +0,0 @@
# Databricks notebook source
# MAGIC %md
# MAGIC ### 原本逻辑
# COMMAND ----------
#当更新pack 或品牌 事实数据时需要运行此代码,否则无需运行。
# COMMAND ----------
# MAGIC %sql
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_oap' where file_name ='pack-CV-抗血栓2通用名-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_htn' where file_name ='pack-CV-高血压-化学药-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_atomizer' where file_name ='pack-雾化器-全国&县域数据.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_anti_asthma_copd' where file_name ='pack-RE-慢阻肺-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_zk_brand' where file_name ='Brand-品牌数据报表.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_statin_xzk' where file_name ='pack-CV-他汀类+血脂康-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_rd' where file_name ='pack-RD-肾科-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_aagsa_ppi_oral' where file_name ='pack-GI-慢性胃炎胃溃疡-全国.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_niad' where file_name ='pack-DM-口服降糖化学药.xlsx';
# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_metoprolol_tartrat' where file_name ='pack-CV-酒石酸美托洛尔.xlsx';
# MAGIC
# COMMAND ----------
# pack数据自动接入 整合
#获取配置表信息(表名、brand_flag
df = spark.sql("""
SELECT DISTINCT table_name tab ,file_name brand_flag FROM dwd.dwd_gnd_ext_retail_corresponding_relationship
where type_name ='PACK'
""").collect()
def get_union_pack_data(df):
#初始化结果集
union_query = None
# niad_pdot_unit需特殊赋值对应表名tmp.tmp_inc_gnd_ext_retail_nataional_niad
# niad_pdot_unit_flag = 'tmp.tmp_inc_gnd_ext_retail_nataional_niad'
for table in df:
# 选择当前表名
T = str(table.tab)
# 获取对应brand表维度对应得 market 名称
brand_flag = str(table.brand_flag)
sql = f"""
select
cast(t1.month as int) AS YYYYMM
,cast(left(t1.quarter, 4) as int) AS year
,right(t1.quarter, 2) AS quarter
,t1.quarter AS yq
,t1.zk_product_id
,t1.zk_region
,t1.zk_rx_otc
,t1.zk_medicine_type
,t1.zk_medicine_tier1
,t1.zk_medicine_tier2
,t1.zk_medicine_tier3
,t1.zk_medicine_tier4
,t1.zk_common_name
,t1.zk_dosage_form
,t1.zk_user_type
,t1.zk_category_name
,t1.zk_product_name
,t1.zk_brand_name
,t1.zk_manu_des
,t1.zk_corp_des
,t1.zk_pack_des
,t1.price
,CAST(replace(t1.sales_unit,',','') AS decimal(30,10)) as sales_unit
,CAST(replace(t1.sales_value,',','') AS decimal(30,10)) as sales_value
,CAST(replace(t1.digital_spread_rate,',','') AS decimal(30,10)) as digital_spread_rate
,CAST(replace(t1.weighted_spread_rate,',','') AS decimal(30,10)) as weighted_spread_rate
,CAST(replace(t1.counting_unit,',','') AS decimal(30,10)) as counting_unit
,'{brand_flag}' as brand_flag
,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_insert_dt
,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_update_dt
from {T} t1
left join dws.dws_ext_retail_td_prod t2
on t1.zk_product_id = t2.zk_product_id
where month is not null
"""
# 读取数据
current_query = spark.sql(sql)
#union 数据
if union_query ==None:
union_query=current_query
else:
union_query = union_query.union(current_query)
#返回数据集 / 写入表也行???
return union_query
pack_result = get_union_pack_data(df)
pack_result.write.mode("overwrite").saveAsTable("dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all")
# COMMAND ----------
# brand+ 省份数据自动接入
#获取配置表信息(表名、brand_flag
dfband = spark.sql("""
SELECT DISTINCT table_name tab ,file_name brand_flag FROM dwd.dwd_gnd_ext_retail_corresponding_relationship
where type_name ='BRAND'
""").collect()
def get_union_brand_data(df):
#数据为空
if df == None:
return None
#初始化结果集
union_query = None
for table in df:
# 选择当前表名
T = str(table.tab)
# 获取对应brand表维度对应得 market 名称
pack_flag = str(table.brand_flag)
sql = f"""
select
cast(left(quarter, 4)*100 + right(quarter,1)*3 as int ) AS YYYYMM
,cast(left(quarter, 4) as int ) AS year
,right(quarter, 2) AS quarter
,quarter AS yq
,type AS brand_cat_type
,case when ta = 'NIAD' then 'DM' else ta end AS TA
,market AS market
,zk_brand_category AS zk_brand_category
,zk_common_name AS zk_common_name
,zk_manu_des AS zk_manu_des
,rc_name_en AS rc_name_en
,province_city AS province_city
,ytd AS ytd
,cast(sales_value * 1000000 as decimal(30,10)) AS sales_val
,cast(sales_volume * 1000000 as decimal(30,10)) AS sales_vol
,cast(price as decimal(30,10)) as price
,cast(num_dist_rate as decimal(30,10)) as num_dist_rate
,cast(weig_dist_rate as decimal(30,10)) as weig_dist_rate
,cast(value_share as decimal(30,10)) as val_share
,cast(volume_share as decimal(30,10)) as vol_share
,replace(key_brand_ytd,'-','') as key_brand_ytd
,cast(replace(key_brand_rank_ytd,'-','0') as int) as key_brand_rank_ytd
,replace(top_brand_ytd,'-','') as top_brand_ytd
,cast(replace(top_brand_ms_ytd,'-','0') as decimal(30,10)) as top_brand_ms_ytd
,cast(replace(top_brand_inc_ms_ytd,'-','0') as decimal(30,10)) as top_brand_inc_ms_ytd
,cast(replace(top_brand_gr_ytd,'-','0') as decimal(30,10)) as top_brand_gr_ytd
,replace(key_brand_qtd,'-','') as key_brand_qtd
,cast(replace(key_brand_rank_qtd,'-','0') as int) as key_brand_rank_qtd
,replace(top_brand_qtd,'-','') as top_brand_qtd
,cast(replace(top_brand_ms_qtd,'-','0') as decimal(30,10)) as top_brand_ms_qtd
,cast(replace(top_brand_inc_ms_qtd,'-','0') as decimal(30,10)) as top_brand_inc_ms_qtd
,cast(replace(top_brand_gr_qtd,'-','0') as decimal(30,10)) as top_brand_gr_qtd
,ranked_by as ranked_by
,'{pack_flag}' as pack_flag
,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_insert_dt
,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_update_dt
from {T}
"""
# 读取数据
current_query = spark.sql(sql)
#union 数据
if union_query == None:
union_query = current_query
else:
union_query = union_query.union(current_query)
#返回数据集 / 写入表也行???
return union_query
brand_result = get_union_brand_data(dfband)
brand_result.write.mode("overwrite").saveAsTable("dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all")
# COMMAND ----------
# MAGIC %sql
# MAGIC ----------------------多达一、天一宁、others、氨氯地平阿托伐他汀钙 数据在 高血压和他汀血脂康里面重复但该数据在品牌报表里面没有对应的值会导致后续拆分到pack + 省份时得到的结果不一致,因此需要将高血压或者他汀血脂康数据 根据pack+ 全国数据按省份数平均分配并汇总到品牌维度写入品牌数据报表为后续pack +全国拆分到pack + 省份 提供数据基础
# MAGIC insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC with data_pack as (
# MAGIC ----------底表获取基础数据----pack 汇总到品牌 --并按省份数量平均
# MAGIC select brand_flag,a.YYYYMM,nvl(b.prod_des_c,'OTHERS' ) prod_des_c ,sum(a.sales_value)/sum(num) sales_value,sum(a.sales_unit)/sum(num) sales_unit ,sum(a.sales_value) sales ,sum(a.sales_unit) saleu
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all a
# MAGIC left join dwd.dwd_gnd_ext_retail_pack_property b on a.zk_product_id = b.product_id
# MAGIC cross join (select count(distinct zk_region) num from dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all where zk_region<>'全国')
# MAGIC where a.zk_common_name ='氨氯地平阿托伐他汀钙' and a.brand_flag in ('pack-CV-他汀类+血脂康-全国.xlsx' ,'pack-CV-高血压-化学药-全国.xlsx','pack-CV-抗血栓2通用名-全国.xlsx')
# MAGIC group by 1,2,3
# MAGIC ),city as (
# MAGIC ----获取省份及年月头表
# MAGIC select distinct
# MAGIC YYYYMM
# MAGIC ,year
# MAGIC ,quarter
# MAGIC ,yq
# MAGIC ,province_city
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where province_city !='全国' and ranked_by ='volume'
# MAGIC )
# MAGIC , split as (
# MAGIC ------分别补充全国数据、品牌数据------
# MAGIC select
# MAGIC city.YYYYMM
# MAGIC ,year
# MAGIC ,quarter
# MAGIC ,yq
# MAGIC ,'品牌' brand_cat_type
# MAGIC ,'CV' TA
# MAGIC ,brand_flag market
# MAGIC ,prod_des_c
# MAGIC ,''
# MAGIC ,''
# MAGIC ,''
# MAGIC ,province_city
# MAGIC ,''
# MAGIC ,sales_value
# MAGIC ,sales_unit
# MAGIC ,0,0,0,0,0,'' ,0 ,'' ,0,0,0,'',0,'',0,0,0,'volume' ,''
# MAGIC from data_pack left join city on city.YYYYMM=data_pack.YYYYMM
# MAGIC union all
# MAGIC select
# MAGIC city.YYYYMM
# MAGIC ,year
# MAGIC ,quarter
# MAGIC ,yq
# MAGIC ,'品牌' brand_cat_type
# MAGIC ,'CV' TA
# MAGIC ,brand_flag market
# MAGIC ,prod_des_c
# MAGIC ,''
# MAGIC ,''
# MAGIC ,''
# MAGIC ,'全国' province_city
# MAGIC ,''
# MAGIC ,sales sales_value
# MAGIC ,saleu sales_unit
# MAGIC ,0,0,0,0,0,'' ,0 ,'' ,0,0,0,'',0,'',0,0,0,'volume' ,''
# MAGIC from data_pack left join (select distinct year,quarter,yyyymm,yq from city ) city on city.YYYYMM=data_pack.YYYYMM
# MAGIC
# MAGIC )
# MAGIC select * from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC union all
# MAGIC select *
# MAGIC ,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_insert_dt
# MAGIC ,from_utc_timestamp(current_timestamp(),'UTC+8') AS etl_update_dt
# MAGIC from split
# COMMAND ----------
# MAGIC %sql
# MAGIC --补位
# MAGIC with tmp as (
# MAGIC select
# MAGIC YYYYMM,
# MAGIC year,
# MAGIC quarter,
# MAGIC yq,
# MAGIC case when length(zk_product_id) < 7 then right(concat('0000000',zk_product_id),7) else zk_product_id end as zk_product_id,
# MAGIC zk_region,
# MAGIC zk_rx_otc,
# MAGIC zk_medicine_type,
# MAGIC zk_medicine_tier1,
# MAGIC zk_medicine_tier2,
# MAGIC zk_medicine_tier3,
# MAGIC zk_medicine_tier4,
# MAGIC zk_common_name,
# MAGIC zk_dosage_form,
# MAGIC zk_user_type,
# MAGIC zk_category_name,
# MAGIC zk_product_name,
# MAGIC zk_brand_name,
# MAGIC zk_manu_des,
# MAGIC zk_corp_des,
# MAGIC zk_pack_des,
# MAGIC price,
# MAGIC sales_unit,
# MAGIC sales_value,
# MAGIC digital_spread_rate,
# MAGIC weighted_spread_rate,
# MAGIC counting_unit,
# MAGIC brand_flag,
# MAGIC etl_insert_dt,
# MAGIC etl_update_dt
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all
# MAGIC -- pack-CV-酒石酸美托洛尔.xlsx的数据没有了所以排除 酒石酸美托洛尔
# MAGIC where brand_flag<>'pack-CV-酒石酸美托洛尔.xlsx'
# MAGIC )
# MAGIC
# MAGIC insert overwrite dwd.dwd_inc_gnd_ext_retail_nataional_pack_union_all
# MAGIC select *
# MAGIC from tmp;
# COMMAND ----------
# MAGIC %md
# MAGIC ### 新逻辑
# MAGIC - 修改brand数据先拆分成月维度的数据
# COMMAND ----------
# MAGIC %sql
# MAGIC /*
# MAGIC 修改时间20250311
# MAGIC 修改人chenwu
# MAGIC 修改内容brand来数频率为 季度来数, 但是 pack 为 月度来数据,需要用季度的数据/3得到月度的
# MAGIC */
# MAGIC insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC with quarterly_table as (
# MAGIC select
# MAGIC *
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where market not in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液')
# MAGIC -- 范围内只能是 季度来数据的,如果有月度来数据的需要排除掉
# MAGIC )
# MAGIC
# MAGIC ,month_table as (--转化成月度数据
# MAGIC SELECT
# MAGIC SUBSTR(q.yq, 1, 4)*100 + -- 提取年份
# MAGIC LPAD(m.month_num, 2, '0') -- 补零月份
# MAGIC AS YYYYMM -- 月份首日
# MAGIC ,`year`
# MAGIC ,`quarter`
# MAGIC ,yq
# MAGIC ,brand_cat_type
# MAGIC ,TA
# MAGIC ,market
# MAGIC ,zk_brand_category
# MAGIC ,zk_common_name
# MAGIC ,zk_manu_des
# MAGIC ,rc_name_en
# MAGIC ,province_city
# MAGIC ,ytd
# MAGIC ,sales_val /3 --除3
# MAGIC ,sales_vol /3 --除3
# MAGIC ,price
# MAGIC ,num_dist_rate
# MAGIC ,weig_dist_rate
# MAGIC ,val_share
# MAGIC ,vol_share
# MAGIC ,key_brand_ytd
# MAGIC ,key_brand_rank_ytd
# MAGIC ,top_brand_ytd
# MAGIC ,top_brand_ms_ytd
# MAGIC ,top_brand_inc_ms_ytd
# MAGIC ,top_brand_gr_ytd
# MAGIC ,key_brand_qtd
# MAGIC ,key_brand_rank_qtd
# MAGIC ,top_brand_qtd
# MAGIC ,top_brand_ms_qtd
# MAGIC ,top_brand_inc_ms_qtd
# MAGIC ,top_brand_gr_qtd
# MAGIC ,ranked_by
# MAGIC ,pack_flag
# MAGIC ,etl_insert_dt
# MAGIC ,etl_update_dt
# MAGIC FROM
# MAGIC quarterly_table q
# MAGIC LATERAL VIEW EXPLODE( -- 为每季度生成三个月
# MAGIC CASE
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q1' THEN ARRAY(1, 2, 3)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q2' THEN ARRAY(4, 5, 6)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q3' THEN ARRAY(7, 8, 9)
# MAGIC WHEN RIGHT(q.yq, 2) = 'Q4' THEN ARRAY(10, 11, 12)
# MAGIC END
# MAGIC ) m AS month_num
# MAGIC )
# MAGIC
# MAGIC ,other_not_quarterly_table (
# MAGIC select
# MAGIC *
# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all
# MAGIC where market in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液')
# MAGIC -- 范围内只能是 月度来数据的
# MAGIC )
# MAGIC
# MAGIC select * from month_table
# MAGIC union all
# MAGIC select * from other_not_quarterly_table

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,186 +0,0 @@
-- Databricks notebook source
INSERT OVERWRITE dm.dm_td_external_market (
MARKET, DATA_SOURCE, TA, AZ_RELATED, MARKET_DESC, TA_RN, PRD_FLAG
)
SELECT
MARKET,
DATA_SOURCE,
MAX(TA) TA,
MAX(AZ_RELATED) AZ_RELATED,
MAX(MARKET_DESC) MARKET_DESC,
MAX(TA_RN) TA_RN,
case
when
DATA_SOURCE = 'Retail(Quarterly)'
and MARKET in (
'Antacid anti GI swelling agent+PPI Oral',
'HTN Market',
'NIAD (Non-Insulin Anti-Diabetic) Market',
'Respules Market',
'Statin+XZK Market',
'BUDESONIDE Market',
'Rosuvastatin Market',
'Atorvastatin Market',
'Metoprolol Succinate Market',
'PPI Oral ESOMEPRAZOLE Market',
'PPI Oral OMEPRAZOLE Market',
--'Inhaled Extended Market by Brand',
'Inhaled Extended Market',
'Non-Omeprazole',
'Metoprolol Tartrate Market',
'Pediatric Cough Market',
'Anti-HER2 Market',
'Anti-HER2 mBC Market',
'Fasenra Market',
'Severe Asthma'
)
then
'1'
when
data_source = 'DTP(Quarterly)'
and market in (
'EGFR TKI Market',
'Anti-HER2 Market',
'Anti-HER2 mBC Market',
'Fasenra/Tezspire Related Market',
'Severe Asthma',
'Tezspire CRSwNP Market',
'CRSwNP Market',
'PAM Market'
)
then
'1'
else '0'
end as PRD_FLAG
FROM
DM.dm_td_external_market_pack_mapping_temp
GROUP BY
MARKET,
DATA_SOURCE
-- COMMAND ----------
refresh table dm.DM_TD_EXTERNAL_MARKET
-- COMMAND ----------
insert overwrite dm.dm_td_external_market_ta(MARKET,DATA_SOURCE,TA,TA_RN,MARKET_DESC)
SELECT MARKET,DATA_SOURCE, AZ_RELATED TA, TA_RN,MARKET_DESC
FROM DM.DM_TD_EXTERNAL_MARKET
WHERE MARKET NOT LIKE '%ALL Market%' and MARKET <>'Retail All Market'
UNION all
SELECT MARKET,DATA_SOURCE, 'CVRM' TA,0 TA_RN,MARKET_DESC
FROM DM.DM_TD_EXTERNAL_MARKET
WHERE MARKET = 'CVRM Market'
-- COMMAND ----------
-----------------------------------------------------------------------------------
--修改时间20240822
--FanXujia
--
--RetailEC渠道NIAD这个TADM相同
-----------------------------------------------------------------------------------
insert into dm.dm_td_external_market_ta
(
MARKET,
DATA_SOURCE,
TA,
TA_RN,
MARKET_DESC
)
select
market,
DATA_SOURCE,
'NIAD' as TA,
7 as TA_RN,
MARKET_DESC
from dm.dm_td_external_market_ta
where DATA_SOURCE = 'Retail(Quarterly)'
and ta = 'DM'
union all
select
market,
DATA_SOURCE,
'NIAD' as TA,
7 as TA_RN,
MARKET_DESC
from dm.dm_td_external_market_ta
where DATA_SOURCE = 'EC(Monthly)'
and ta = 'DM'
--20250701 chenwu RARE = ONCO的Soliris Related Market
union all
select
market,
DATA_SOURCE,
'RARE' as TA,
8 as TA_RN,
MARKET_DESC
from dm.dm_td_external_market_ta
where DATA_SOURCE = 'Retail(Quarterly)'
and ta = 'ONCO'
and MARKET = 'Soliris Related Market'
-- COMMAND ----------
-- log 20250226 auth:庄伟
-- 对market_ta表中市场进行补全并依据手工文件维护的 market 与 ta 映射匹配出完整市场对应的TA_NAME
insert overwrite table dm.dm_td_external_market_ta
(
MARKET,
DATA_SOURCE,
TA,
TA_MAP,
TA_RN,
MARKET_DESC
)
-----market维度表中不存在的market列表
with temp_ma_ext_market
as
(
select
distinct
MARKET
,DATA_SOURCE
from dm.dm_td_external_market
where MARKET NOT IN (
select distinct MARKET
from dm.dm_td_external_market_ta
)
)
------market列表market匹配手工文件维护的market对应的TA
select
distinct
dim_mkt_ta.MARKET,
DATA_SOURCE,
dim_mkt_ta.TA,
nvl(mkt_property.ta,mkt_property_key.TA) TA_MAP,
dim_mkt_ta.TA_RN,
dim_mkt_ta.MARKET_DESC
from
(
select MARKET,DATA_SOURCE,TA,TA_RN,MARKET_DESC
from dm.dm_td_external_market_ta
)dim_mkt_ta
left join dm.dm_ims_td_market_property mkt_property
on dim_mkt_ta.market = mkt_property.market
left join dm.dm_ims_td_market_property mkt_property_key
on dim_mkt_ta.MARKET = mkt_property_key.MARKET_KEY
union all
select
ext_market.MARKET,
ext_market.DATA_SOURCE,
null TA,
mkt_property.TA TA_MAP,
'99' as TA_RN,
null MARKET_DESC
from temp_ma_ext_market ext_market
left join dm.dm_ims_td_market_property mkt_property
on ext_market. market =mkt_property.MARKET
where nvl(mkt_property.TA,'')<>'';