diff --git a/07 dm_td_external_packinfo.sql b/07 dm_td_external_packinfo.sql index 7b6149d..7afddf5 100644 --- a/07 dm_td_external_packinfo.sql +++ b/07 dm_td_external_packinfo.sql @@ -13,10 +13,10 @@ ---------------------------------------------------------------------------- create or replace temporary view external_sales_union as select distinct -PACK_COD, +PACK_CODE as PACK_COD, DATA_SOURCE, PACK_FLAG, -CORP_COD, +CORP_CODE AS CORP_COD, SALES_VALUE_CAL, dept_name, new_code, @@ -2830,4 +2830,4 @@ left join (select CMPS_DES_C,DATA_SOURCE from base_table having count(distinct cmps_cod)>1 ) t3 on t1.CMPS_DES_C = t3.CMPS_DES_C and t1.data_source = t3.data_source -where t2.prod_des_c is not null or t3.cmps_des_c is not null \ No newline at end of file +where t2.prod_des_c is not null or t3.cmps_des_c is not null diff --git a/09 dm_td_external_geo_type.sql b/09 dm_td_external_geo_type.sql index c8fa7ed..9ea8313 100644 --- a/09 dm_td_external_geo_type.sql +++ b/09 dm_td_external_geo_type.sql @@ -856,7 +856,7 @@ AS SELECT DISTINCT A.DATA_SOURCE, A.AUDIT_COD, - B.PACK_COD, + B.PACK_CODE, CASE WHEN A.PROVINCE_C IN ( @@ -888,14 +888,14 @@ FROM DM.dm_td_external_geo_temp A INNER JOIN DM.DM_TF_EXT_UNIONALL_SALES B ON A.DATA_SOURCE = B.DATA_SOURCE - AND A.AUDIT_COD = B.AUDIT_COD + AND A.AUDIT_COD = B.AUDIT_CODE WHERE A.DATA_SOURCE = 'IQVIA-CHPA(Monthly)' UNION ALL SELECT DISTINCT A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, '全国' PROVINCE_C, A.REGION_TYPE, '全国' PROVINCE_MAP, @@ -909,8 +909,8 @@ WHERE UNION ALL SELECT DISTINCT A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, C.ProvinceGroup PROVINCE_C, A.REGION_TYPE, '' PROVINCE_MAP, @@ -934,7 +934,7 @@ FROM B.CITY_TYPE = 'City' and b.DATA_SOURCE = 'IQVIA-CHPA(Monthly)' ) C - ON C.AUDIT_COD = a.AUDIT_COD + ON C.AUDIT_COD = a.AUDIT_CODE WHERE A.DATA_SOURCE = 'IQVIA-CHPA(Monthly)' @@ -1194,8 +1194,8 @@ CREATE OR REPLACE TEMPORARY VIEW THC_geo_type_temp AS SELECT DISTINCT A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, C.ProvinceGroup PROVINCE_C, A.REGION_TYPE, '' PROVINCE_MAP, @@ -1217,14 +1217,14 @@ FROM WHERE B.CITY_TYPE = 'Province' and b.DATA_SOURCE = 'THC(Quarterly)' - ) C ON C.AUDIT_COD = a.AUDIT_COD + ) C ON C.AUDIT_COD = a.AUDIT_CODE WHERE A.DATA_SOURCE = 'THC(Quarterly)' UNION ALL SELECT DISTINCT A.DATA_SOURCE, A.AUDIT_COD, - B.PACK_COD, + B.PACK_CODE, A.PROVINCE_C, B.REGION_TYPE, A.PROVINCE_MAP, @@ -1236,10 +1236,10 @@ FROM INNER JOIN ( --补充所有的geo维度,保证全国的数据和省份的数据都有对应的维度。 select - PACK_COD, + PACK_CODE, REGION_TYPE, DATA_SOURCE, - AUDIT_COD + AUDIT_CODE from dm.DM_TF_EXT_UNIONALL_SALES WHERE @@ -1254,7 +1254,7 @@ FROM dm.dm_tf_external_sales_thc -- ) B ON A.DATA_SOURCE = B.DATA_SOURCE - AND A.AUDIT_COD = B.AUDIT_COD + AND A.AUDIT_COD = B.audit_code WHERE A.DATA_SOURCE = 'THC(Quarterly)' @@ -1266,8 +1266,8 @@ CREATE OR REPLACE TEMPORARY VIEW AIA_geo_type_temp AS select distinct A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, '全国' as PROVINCE_C, A.REGION_TYPE, '全国' as PROVINCE_MAP, @@ -1281,8 +1281,8 @@ where union all select distinct A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, B.ProvinceGroup as PROVINCE_C, A.REGION_TYPE, '' as PROVINCE_MAP, @@ -1302,14 +1302,14 @@ from WHERE t2.CITY_TYPE = 'City' and t2.DATA_SOURCE = 'AIA(Monthly)' - ) B ON A.AUDIT_COD = B.AUDIT_COD + ) B ON A.AUDIT_CODE = B.AUDIT_COD WHERE A.DATA_SOURCE = 'AIA(Monthly)' union all select distinct A.DATA_SOURCE, A.AUDIT_COD, - B.PACK_COD, + B.PACK_CODE, case when A.PROVINCE_C IN ( 'BBU_OtherProv', @@ -1342,7 +1342,7 @@ select distinct from dm.dm_td_external_geo_temp A inner join dm.DM_TF_EXT_UNIONALL_SALES B on A.DATA_SOURCE = B.DATA_SOURCE - and A.AUDIT_COD = B.AUDIT_COD + and A.AUDIT_COD = B.AUDIT_CODE where A.DATA_SOURCE = 'AIA(Monthly)' @@ -1355,7 +1355,7 @@ AS SELECT DISTINCT A.DATA_SOURCE, A.AUDIT_COD, - B.PACK_COD, + B.PACK_CODE, CASE WHEN A.PROVINCE_C IN ( 'BBU_OtherProv', @@ -1390,14 +1390,14 @@ SELECT DISTINCT FROM DM.dm_td_external_geo_temp A INNER JOIN DM.DM_TF_EXT_UNIONALL_SALES B ON A.DATA_SOURCE = B.DATA_SOURCE - AND A.AUDIT_COD = B.AUDIT_COD + AND A.AUDIT_COD = B.AUDIT_CODE WHERE A.DATA_SOURCE = 'CHC(Quarterly)' UNION ALL SELECT DISTINCT A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, '全国' PROVINCE_C, A.REGION_TYPE, '全国' PROVINCE_MAP, @@ -1411,8 +1411,8 @@ WHERE UNION ALL SELECT DISTINCT A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, C.ProvinceGroup PROVINCE_C, A.REGION_TYPE, '' PROVINCE_MAP, @@ -1434,7 +1434,7 @@ FROM WHERE B.CITY_TYPE = 'City' and b.DATA_SOURCE = 'CHC(Quarterly)' - ) C ON C.AUDIT_COD = a.AUDIT_COD + ) C ON C.AUDIT_COD = a.AUDIT_CODE WHERE A.DATA_SOURCE = 'CHC(Quarterly)' @@ -1447,7 +1447,7 @@ AS select distinct a.DATA_SOURCE, a.AUDIT_COD, - b.PACK_COD, + b.PACK_CODE, a.PROVINCE_C, b.REGION_TYPE, a.PROVINCE_MAP, @@ -1456,7 +1456,7 @@ select distinct 1 RN from dm.dm_td_external_geo_temp a - left join dm.DM_TF_EXT_UNIONALL_SALES b on a.audit_cod = b.AUDIT_COD + left join dm.DM_TF_EXT_UNIONALL_SALES b on a.audit_cod = b.AUDIT_CODE where a.DATA_SOURCE = 'EC(Monthly)' and b.DATA_SOURCE = 'EC(Monthly)' @@ -1469,8 +1469,8 @@ CREATE OR REPLACE TEMPORARY VIEW XH_geo_type_temp AS select distinct A.DATA_SOURCE, - A.AUDIT_COD, - A.PACK_COD, + A.AUDIT_CODE, + A.PACK_CODE, '全国' as PROVINCE_C, A.REGION_TYPE, '全国' as PROVINCE_MAP, @@ -1485,7 +1485,7 @@ union all select distinct A.DATA_SOURCE, A.AUDIT_COD, - B.PACK_COD, + B.PACK_CODE, case when A.PROVINCE_C IN ( 'BBU_OtherProv', @@ -1518,7 +1518,7 @@ select distinct from dm.dm_td_external_geo_temp A inner join dm.DM_TF_EXT_UNIONALL_SALES B on A.DATA_SOURCE = B.DATA_SOURCE - and A.AUDIT_COD = B.AUDIT_COD + and A.AUDIT_COD = B.AUDIT_CODE where A.DATA_SOURCE = 'XH Data(Quarterly)' @@ -1536,7 +1536,7 @@ where create or replace temporary view dm_td_external_geo_type_temp as select -DATA_SOURCE,AUDIT_COD,PACK_COD,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN +DATA_SOURCE,AUDIT_COD,PACK_CODE,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN from CHPA_geo_type_temp union all @@ -1551,27 +1551,27 @@ from RETAIL_geo_type_temp union all select -DATA_SOURCE,AUDIT_COD,PACK_COD,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN +DATA_SOURCE,AUDIT_CODE,PACK_CODE,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN from THC_geo_type_temp union all select -DATA_SOURCE,AUDIT_COD,PACK_COD,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN +DATA_SOURCE,AUDIT_CODE,PACK_CODE,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN from AIA_geo_type_temp union all select -DATA_SOURCE,AUDIT_COD,PACK_COD,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN +DATA_SOURCE,AUDIT_COD,PACK_CODE,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN from CHC_geo_type_temp union all select -DATA_SOURCE,AUDIT_COD,PACK_COD,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN +DATA_SOURCE,AUDIT_COD,PACK_CODE,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN from EC_geo_type_temp union all select -DATA_SOURCE,AUDIT_COD,PACK_COD,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN +DATA_SOURCE,AUDIT_CODE,PACK_CODE,PROVINCE_C,REGION_TYPE,PROVINCE_MAP,CITY_C,AZ_CITY_TIER,RN from XH_geo_type_temp @@ -1699,7 +1699,7 @@ as SUM(B.SALES_VALUE_CAL) SALES_VALUE_CAL FROM TEMP A - INNER JOIN DM.DM_TF_EXT_UNIONALL_SALES B ON A.AUDIT_COD = B.AUDIT_COD + INNER JOIN DM.DM_TF_EXT_UNIONALL_SALES B ON A.AUDIT_COD = B.AUDIT_CODE AND A.DATA_SOURCE = B.DATA_SOURCE AND A.REGION_TYPE = B.REGION_TYPE and A.DATA_SOURCE = B.DATA_SOURCE @@ -1749,7 +1749,7 @@ as SUM(B.SALES_VALUE_CAL) SALES_VALUE_CAL FROM TEMP A - INNER JOIN DM.DM_TF_EXT_UNIONALL_SALES B ON A.AUDIT_COD = B.AUDIT_COD + INNER JOIN DM.DM_TF_EXT_UNIONALL_SALES B ON A.AUDIT_COD = B.AUDIT_CODE AND A.DATA_SOURCE = B.DATA_SOURCE AND A.REGION_TYPE = B.REGION_TYPE and A.DATA_SOURCE = B.DATA_SOURCE @@ -1965,4 +1965,4 @@ WHERE UNION ALL SELECT '全国' - ) \ No newline at end of file + ) diff --git a/Retail/04 map_to_dws_table.ipynb b/Retail/04 map_to_dws_table.ipynb new file mode 100644 index 0000000..d38e458 --- /dev/null +++ b/Retail/04 map_to_dws_table.ipynb @@ -0,0 +1,381 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "4d16488f-0327-4ced-b23f-41f960a90d2f", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "############################################################START##############################################################\n", + "### STEP-1: insert splited pack data into tmp final table: tmp_retail_final_sales" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "1168666b-255b-44a3-968b-4156c93dad53", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "-------------------------------------------------------------------------------------\n", + "-- STEP-1: insert splited pack data into tmp final table\n", + "-- insert into tmp_retail_final_sales\n", + "-------------------------------------------------------------------------------------\n", + "\n", + "with tmp_pack as (\n", + " select \n", + " ------------------------------------------------------ \n", + " -- 有月度数据使用月度数据,无月度数据用季度数据去转\n", + " nvl(\n", + " a.month,\n", + " CONCAT(\n", + " SUBSTRING(a.quarter, 1, 4), -- 提取年份(前4位)\n", + " CASE \n", + " WHEN SUBSTRING(a.quarter, 6, 1) = '1' THEN '03' -- Q1 → 03月\n", + " WHEN SUBSTRING(a.quarter, 6, 1) = '2' THEN '06' -- Q2 → 06月\n", + " WHEN SUBSTRING(a.quarter, 6, 1) = '3' THEN '09' -- Q3 → 09月\n", + " WHEN SUBSTRING(a.quarter, 6, 1) = '4' THEN '12' -- Q4 → 12月\n", + " END\n", + " )\n", + " ) as YYYYMM,\n", + " ------------------------------------------------------ \n", + " a.pack_code as iqvia_pack_code,\n", + " a.product_id as zk_product_id,\n", + " case when a.product_desc <> 'others' then a.product_desc else null end as prod_des_c,\n", + " case when a.product_desc <> 'others' then a.product_desc else concat('Others_', a.molecule_desc) end as PROD_MAPPING,\n", + " a.zk_regin as province_city,\n", + " a.level_market as market,\n", + " a.sales_value,\n", + " a.sales_unit,\n", + " ------------------------------------------------------ \n", + " -- counting_unit取值逻辑:\n", + " -- 不能直接取原始pack文件表中的值,改为取pack_property表中counting_unit / unit的值\n", + " a.sales_unit * (b.counting_unit/ coalesce(b.unit,1)) as counting_unit,\n", + " ------------------------------------------------------ \n", + " case when data_flag = 0 then 1 else 2 end as pack_flag,\n", + " case when brand_flag = 1 then 1 else 2 end as brand_flag\n", + " from tmp.tmp_retail_pack_rawdata a\n", + " left join dwd.dwd_gnd_ext_retail_pack_property b \n", + " on a.product_id = b.product_id\n", + "), tmp_has_roc as (\n", + " select\n", + " product_id,\n", + " quarter,\n", + " ------------------------------------------------------ \n", + " -- 有月度数据使用月度数据,无月度数据用季度数据去转\n", + " nvl(\n", + " month,\n", + " CONCAT(\n", + " SUBSTRING(quarter, 1, 4), -- 提取年份(前4位)\n", + " CASE \n", + " WHEN SUBSTRING(quarter, 6, 1) = '1' THEN '03' -- Q1 → 03月\n", + " WHEN SUBSTRING(quarter, 6, 1) = '2' THEN '06' -- Q2 → 06月\n", + " WHEN SUBSTRING(quarter, 6, 1) = '3' THEN '09' -- Q3 → 09月\n", + " WHEN SUBSTRING(quarter, 6, 1) = '4' THEN '12' -- Q4 → 12月\n", + " END\n", + " )\n", + " ) as month,\n", + " ------------------------------------------------------ \n", + " pack_code\n", + " from tmp.tmp_retail_pack_rawdata \n", + " where zk_regin = 'ROC'\n", + "), tmp_pack_this_year_with_roc as (\n", + " select \n", + " * \n", + " from tmp_pack a \n", + " where exists(\n", + " select * from tmp_has_roc b \n", + " where a.YYYYMM = b.month\n", + " and a.iqvia_pack_code = b.pack_code\n", + " and a.zk_product_id = b.product_id\n", + " ) and a.province_city <> '全国'\n", + "), tmp_pack_next_year_with_roc as (\n", + " select \n", + " cast(YYYYMM + 100 as int) as YYYYMM,\n", + " iqvia_pack_code,\n", + " zk_product_id,\n", + " prod_des_c,\n", + " PROD_MAPPING,\n", + " province_city,\n", + " market,\n", + " sales_value as sales_value_ly,\n", + " sales_unit as sales_unit_ly,\n", + " counting_unit as counting_unit_ly,\n", + " pack_flag,\n", + " brand_flag\n", + " from tmp_pack a\n", + " where YYYYMM + 100 <= (select max(YYYYMM) from tmp_pack)\n", + " and exists(\n", + " select * from tmp_has_roc b \n", + " where a.YYYYMM = b.month\n", + " and a.iqvia_pack_code = b.pack_code\n", + " and a.zk_product_id = b.product_id\n", + " ) and a.province_city <> '全国'\n", + "\n", + "), tmp_pack_this_year_without_roc as (\n", + " select \n", + " *\n", + " from tmp_pack a \n", + " where not exists(\n", + " select * from tmp_has_roc b \n", + " where a.YYYYMM = b.month\n", + " and a.iqvia_pack_code = b.pack_code\n", + " and a.zk_product_id = b.product_id\n", + " )\n", + "), tmp_pack_next_year_without_roc as (\n", + " select \n", + " cast(YYYYMM + 100 as int) as YYYYMM,\n", + " iqvia_pack_code,\n", + " zk_product_id,\n", + " prod_des_c,\n", + " PROD_MAPPING,\n", + " province_city,\n", + " market,\n", + " sales_value as sales_value_ly,\n", + " sales_unit as sales_unit_ly,\n", + " counting_unit as counting_unit_ly,\n", + " pack_flag,\n", + " brand_flag\n", + " from tmp_pack a \n", + " where YYYYMM + 100 <= (select max(YYYYMM) from tmp_pack)\n", + " and not exists(\n", + " select * from tmp_has_roc b \n", + " where a.YYYYMM = b.month\n", + " and a.iqvia_pack_code = b.pack_code\n", + " and a.zk_product_id = b.product_id\n", + " )\n", + "), tmp_final_sales as (\n", + " select \n", + " ifnull(a.yyyymm, b.yyyymm) as yyyymm,\n", + " ifnull(a.iqvia_pack_code, b.iqvia_pack_code) as iqvia_pack_code, \n", + " ifnull(a.zk_product_id, b.zk_product_id) as zk_product_id,\n", + " ifnull(a.prod_des_c, b.prod_des_c) as prod_des_c,\n", + " ifnull(a.PROD_MAPPING, b.PROD_MAPPING) as PROD_MAPPING,\n", + " ifnull(a.province_city, b.province_city) as province_city,\n", + " ifnull(a.market, b.market) as market,\n", + " ifnull(a.sales_value, 0) as sales_value,\n", + " ifnull(a.sales_unit, 0) as sales_unit,\n", + " ifnull(a.counting_unit, 0) as counting_unit,\n", + " ifnull(a.pack_flag, b.pack_flag) as pack_flag,\n", + " ifnull(a.brand_flag,b.brand_flag ) as brand_flag,\n", + " ifnull(b.sales_value_ly, 0) as sales_value_ly,\n", + " ifnull(b.sales_unit_ly, 0) as sales_unit_ly,\n", + " ifnull(b.counting_unit_ly, 0) as counting_unit_ly\n", + " from tmp_pack_this_year_with_roc a \n", + " full outer join tmp_pack_next_year_with_roc b \n", + " on a.YYYYMM = b.YYYYMM\n", + " and a.iqvia_pack_code = b.iqvia_pack_code\n", + " and a.zk_product_id = b.zk_product_id\n", + " and a.province_city = b.province_city\n", + "\n", + " union all\n", + "\n", + " select \n", + " ifnull(c.yyyymm, d.yyyymm) as yyyymm,\n", + " ifnull(c.iqvia_pack_code, d.iqvia_pack_code) as iqvia_pack_code, \n", + " ifnull(c.zk_product_id, d.zk_product_id) as zk_product_id,\n", + " ifnull(c.prod_des_c, d.prod_des_c) as prod_des_c,\n", + " ifnull(c.PROD_MAPPING, d.PROD_MAPPING) as PROD_MAPPING,\n", + " 'ROC' as province_city,\n", + " ifnull(c.market, d.market) as market,\n", + " ifnull(c.sales_value, 0) as sales_value,\n", + " ifnull(c.sales_unit, 0) as sales_unit,\n", + " ifnull(c.counting_unit, 0) as counting_unit,\n", + " --ifnull(c.pack_flag, d.pack_flag) as pack_flag,\n", + " 2 as pack_flag, -- 此类没有拆分比例,且pack只有全国的数,pack_flag固定为2\n", + " ifnull(c.brand_flag,d.brand_flag ) as brand_flag,\n", + " ifnull(d.sales_value_ly, 0) as sales_value_ly,\n", + " ifnull(d.sales_unit_ly, 0) as sales_unit_ly,\n", + " ifnull(d.counting_unit_ly, 0) as counting_unit_ly\n", + " from tmp_pack_this_year_without_roc c \n", + " full outer join tmp_pack_next_year_without_roc d \n", + " on c.YYYYMM = d.YYYYMM\n", + " and c.iqvia_pack_code = d.iqvia_pack_code\n", + " and c.zk_product_id = d.zk_product_id\n", + " and c.province_city = d.province_city \n", + ")\n", + "\n", + "-- insert overwrite table tmp.tmp_retail_final_sales\n", + "\n", + "-- select\n", + "-- yyyymm,\n", + "-- iqvia_pack_code,\n", + "-- zk_product_id,\n", + "-- prod_des_c,\n", + "-- PROD_MAPPING,\n", + "-- province_city,\n", + "-- market,\n", + "-- sales_value,\n", + "-- sales_value_ly,\n", + "-- sales_unit,\n", + "-- sales_unit_ly,\n", + "-- counting_unit,\n", + "-- counting_unit_ly,\n", + "-- pack_flag,\n", + "-- brand_flag\n", + "-- from tmp_final_sales\n", + "-- order by yyyymm\n", + ", tmp_niad_aggregated as (\n", + " select \n", + " concat(left(yyyymm,4),\n", + " case right(yyyymm,2)\n", + " when '01' then 'Q1' when '02' then 'Q1' when '03' then 'Q1'\n", + " when '04' then 'Q2' when '05' then 'Q2' when '06' then 'Q2'\n", + " when '07' then 'Q3' when '08' then 'Q3' when '09' then 'Q3'\n", + " else 'Q4'\n", + " end) as yyyymm_quarter,\n", + " concat(left(yyyymm,4),\n", + " case right(yyyymm,2)\n", + " when '01' then '03' when '02' then '03' when '03' then '03'\n", + " when '04' then '06' when '05' then '06' when '06' then '06'\n", + " when '07' then '09' when '08' then '09' when '09' then '09'\n", + " else '12'\n", + " end) as yyyymm,\n", + " iqvia_pack_code,\n", + " zk_product_id,\n", + " prod_des_c,\n", + " PROD_MAPPING,\n", + " province_city,\n", + " market,\n", + " sum(sales_value) as sales_value,\n", + " sum(sales_unit) as sales_unit,\n", + " sum(counting_unit) as counting_unit,\n", + " max(pack_flag) as pack_flag,\n", + " max(brand_flag) as brand_flag,\n", + " sum(sales_value_ly) as sales_value_ly,\n", + " sum(sales_unit_ly) as sales_unit_ly,\n", + " sum(counting_unit_ly) as counting_unit_ly\n", + " from tmp_final_sales\n", + " where market = 'NIAD'\n", + " group by \n", + " concat(left(yyyymm,4),\n", + " case right(yyyymm,2)\n", + " when '01' then 'Q1' when '02' then 'Q1' when '03' then 'Q1'\n", + " when '04' then 'Q2' when '05' then 'Q2' when '06' then 'Q2'\n", + " when '07' then 'Q3' when '08' then 'Q3' when '09' then 'Q3'\n", + " else 'Q4'\n", + " end),\n", + " concat(left(yyyymm,4),\n", + " case right(yyyymm,2)\n", + " when '01' then '03' when '02' then '03' when '03' then '03'\n", + " when '04' then '06' when '05' then '06' when '06' then '06'\n", + " when '07' then '09' when '08' then '09' when '09' then '09'\n", + " else '12'\n", + " end),\n", + " iqvia_pack_code,\n", + " zk_product_id,\n", + " prod_des_c,\n", + " PROD_MAPPING,\n", + " province_city,\n", + " market\n", + ")\n", + "\n", + "-- 最终结果:NIAD 用季度聚合,其他保持原样\n", + "insert overwrite table tmp.tmp_retail_final_sales\n", + "select \n", + " yyyymm,\n", + " iqvia_pack_code,\n", + " zk_product_id,\n", + " prod_des_c,\n", + " PROD_MAPPING,\n", + " province_city,\n", + " market,\n", + " sales_value,\n", + " sales_value_ly,\n", + " sales_unit,\n", + " sales_unit_ly,\n", + " counting_unit,\n", + " counting_unit_ly,\n", + " pack_flag,\n", + " brand_flag\n", + "from (\n", + " select * from tmp_final_sales where market <> 'NIAD'\n", + " union all\n", + " select \n", + " yyyymm,\n", + " iqvia_pack_code,\n", + " zk_product_id,\n", + " prod_des_c,\n", + " PROD_MAPPING,\n", + " province_city,\n", + " market,\n", + " sales_value,\n", + " sales_value_ly,\n", + " sales_unit,\n", + " sales_unit_ly,\n", + " counting_unit,\n", + " counting_unit_ly,\n", + " pack_flag,\n", + " brand_flag\n", + " from tmp_niad_aggregated\n", + ") t\n", + "order by yyyymm\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "efe5e7ee-82b5-46d9-85f7-650756dffbf8", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "############################################################END################################################################" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 1969542701077462, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 4 + }, + "notebookName": "04 map_to_dws_table", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/Retail/04 map_to_dws_table.py b/Retail/04 map_to_dws_table_backup.py similarity index 99% rename from Retail/04 map_to_dws_table.py rename to Retail/04 map_to_dws_table_backup.py index ee7751a..c6aaea1 100644 --- a/Retail/04 map_to_dws_table.py +++ b/Retail/04 map_to_dws_table_backup.py @@ -206,4 +206,4 @@ # COMMAND ---------- -############################################################END################################################################ \ No newline at end of file +############################################################END################################################################ diff --git a/Retail/09 dwd_inc_gnd_ext_retail_nataional.py b/Retail/09 dwd_inc_gnd_ext_retail_nataional.py index 4fcc913..5812d15 100644 --- a/Retail/09 dwd_inc_gnd_ext_retail_nataional.py +++ b/Retail/09 dwd_inc_gnd_ext_retail_nataional.py @@ -97,81 +97,86 @@ brand_result.write.mode("overwrite").saveAsTable("dwd.dwd_inc_gnd_ext_retail_nat # COMMAND ---------- -# MAGIC %sql -# MAGIC /* -# MAGIC 修改时间:20250311 -# MAGIC 修改人:chenwu -# MAGIC 修改内容:brand来数频率为 季度来数, 但是 pack 为 月度来数据,需要用季度的数据/3得到月度的 -# MAGIC */ -# MAGIC insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all -# MAGIC with quarterly_table as ( -# MAGIC select -# MAGIC * -# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all -# MAGIC where market not in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液') -# MAGIC -- 范围内只能是 季度来数据的,如果有月度来数据的需要排除掉 -# MAGIC ) -# MAGIC -# MAGIC ,month_table as (--转化成月度数据 -# MAGIC SELECT -# MAGIC SUBSTR(q.yq, 1, 4)*100 + -- 提取年份 -# MAGIC LPAD(m.month_num, 2, '0') -- 补零月份 -# MAGIC AS YYYYMM -- 月份首日 -# MAGIC ,`year` -# MAGIC ,`quarter` -# MAGIC ,yq -# MAGIC ,brand_cat_type -# MAGIC ,TA -# MAGIC ,market -# MAGIC ,zk_brand_category -# MAGIC ,zk_common_name -# MAGIC ,zk_manu_des -# MAGIC ,rc_name_en -# MAGIC ,province_city -# MAGIC ,ytd -# MAGIC ,sales_val /3 --除3 -# MAGIC ,sales_vol /3 --除3 -# MAGIC ,price -# MAGIC ,num_dist_rate -# MAGIC ,weig_dist_rate -# MAGIC ,val_share -# MAGIC ,vol_share -# MAGIC ,key_brand_ytd -# MAGIC ,key_brand_rank_ytd -# MAGIC ,top_brand_ytd -# MAGIC ,top_brand_ms_ytd -# MAGIC ,top_brand_inc_ms_ytd -# MAGIC ,top_brand_gr_ytd -# MAGIC ,key_brand_qtd -# MAGIC ,key_brand_rank_qtd -# MAGIC ,top_brand_qtd -# MAGIC ,top_brand_ms_qtd -# MAGIC ,top_brand_inc_ms_qtd -# MAGIC ,top_brand_gr_qtd -# MAGIC ,ranked_by -# MAGIC ,pack_flag -# MAGIC ,etl_insert_dt -# MAGIC ,etl_update_dt -# MAGIC FROM -# MAGIC quarterly_table q -# MAGIC LATERAL VIEW EXPLODE( -- 为每季度生成三个月 -# MAGIC CASE -# MAGIC WHEN RIGHT(q.yq, 2) = 'Q1' THEN ARRAY(1, 2, 3) -# MAGIC WHEN RIGHT(q.yq, 2) = 'Q2' THEN ARRAY(4, 5, 6) -# MAGIC WHEN RIGHT(q.yq, 2) = 'Q3' THEN ARRAY(7, 8, 9) -# MAGIC WHEN RIGHT(q.yq, 2) = 'Q4' THEN ARRAY(10, 11, 12) -# MAGIC END -# MAGIC ) m AS month_num -# MAGIC ) -# MAGIC -# MAGIC ,other_not_quarterly_table ( -# MAGIC select -# MAGIC * -# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all -# MAGIC where market in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液') -# MAGIC -- 范围内只能是 月度来数据的 -# MAGIC ) -# MAGIC -# MAGIC select * from month_table -# MAGIC union all -# MAGIC select * from other_not_quarterly_table \ No newline at end of file +# %sql +# /* +# 修改时间:20250311 +# 修改人:chenwu +# 修改内容:brand来数频率为 季度来数, 但是 pack 为 月度来数据,需要用季度的数据/3得到月度的 + + +# 修改时间:20260428 +# 修改人:zhanghaoyi +# 修改内容:上游汇总为季度数据, 无需拆分 +# */ +# insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all +# with quarterly_table as ( +# select +# * +# from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all +# where market not in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液') +# -- 范围内只能是 季度来数据的,如果有月度来数据的需要排除掉 +# ) + +# ,month_table as (--转化成月度数据 +# SELECT +# SUBSTR(q.yq, 1, 4)*100 + -- 提取年份 +# LPAD(m.month_num, 2, '0') -- 补零月份 +# AS YYYYMM -- 月份首日 +# ,`year` +# ,`quarter` +# ,yq +# ,brand_cat_type +# ,TA +# ,market +# ,zk_brand_category +# ,zk_common_name +# ,zk_manu_des +# ,rc_name_en +# ,province_city +# ,ytd +# ,sales_val /3 --除3 +# ,sales_vol /3 --除3 +# ,price +# ,num_dist_rate +# ,weig_dist_rate +# ,val_share +# ,vol_share +# ,key_brand_ytd +# ,key_brand_rank_ytd +# ,top_brand_ytd +# ,top_brand_ms_ytd +# ,top_brand_inc_ms_ytd +# ,top_brand_gr_ytd +# ,key_brand_qtd +# ,key_brand_rank_qtd +# ,top_brand_qtd +# ,top_brand_ms_qtd +# ,top_brand_inc_ms_qtd +# ,top_brand_gr_qtd +# ,ranked_by +# ,pack_flag +# ,etl_insert_dt +# ,etl_update_dt +# FROM +# quarterly_table q +# LATERAL VIEW EXPLODE( -- 为每季度生成三个月 +# CASE +# WHEN RIGHT(q.yq, 2) = 'Q1' THEN ARRAY(1, 2, 3) +# WHEN RIGHT(q.yq, 2) = 'Q2' THEN ARRAY(4, 5, 6) +# WHEN RIGHT(q.yq, 2) = 'Q3' THEN ARRAY(7, 8, 9) +# WHEN RIGHT(q.yq, 2) = 'Q4' THEN ARRAY(10, 11, 12) +# END +# ) m AS month_num +# ) + +# ,other_not_quarterly_table ( +# select +# * +# from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all +# where market in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液') +# -- 范围内只能是 月度来数据的 +# ) + +# select * from month_table +# union all +# select * from other_not_quarterly_table diff --git a/Retail/09 dwd_inc_gnd_ext_retail_nataional_backup.py b/Retail/09 dwd_inc_gnd_ext_retail_nataional_backup.py new file mode 100644 index 0000000..ff326fa --- /dev/null +++ b/Retail/09 dwd_inc_gnd_ext_retail_nataional_backup.py @@ -0,0 +1,177 @@ +# Databricks notebook source +#当更新pack 或品牌 事实数据时需要运行此代码,否则无需运行。 + +# COMMAND ---------- + +# MAGIC %sql +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_oap' where file_name ='pack-CV-抗血栓2通用名-全国.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_htn' where file_name ='pack-CV-高血压-化学药-全国.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_atomizer' where file_name ='pack-雾化器-全国&县域数据.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_anti_asthma_copd' where file_name ='pack-RE-慢阻肺-全国.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_zk_brand' where file_name ='Brand-品牌数据报表.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_statin_xzk' where file_name ='pack-CV-他汀类+血脂康-全国.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_rd' where file_name ='pack-RD-肾科-全国.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_aagsa_ppi_oral' where file_name ='pack-GI-慢性胃炎胃溃疡-全国.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_nataional_niad' where file_name ='pack-DM-口服降糖化学药.xlsx'; +# MAGIC update dwd.dwd_gnd_ext_retail_corresponding_relationship set table_name ='dwd.dwd_gnd_ext_retail_metoprolol_tartrat' where file_name ='pack-CV-酒石酸美托洛尔.xlsx'; +# MAGIC + +# COMMAND ---------- + + +# brand+ 省份数据自动接入 +#获取配置表信息(表名、brand_flag +dfband = spark.sql(""" +SELECT DISTINCT table_name tab ,file_name brand_flag FROM dwd.dwd_gnd_ext_retail_corresponding_relationship +where type_name ='BRAND' +""").collect() + +def get_union_brand_data(df): + #数据为空 + if df == None: + return None + #初始化结果集 + union_query = None + for table in df: + # 选择当前表名 + T = str(table.tab) + # 获取对应brand表维度对应得 market 名称 + pack_flag = str(table.brand_flag) + sql = f""" + select + cast(left(quarter, 4)*100 + right(quarter,1)*3 as int ) AS YYYYMM + ,cast(left(quarter, 4) as int ) AS year + ,right(quarter, 2) AS quarter + ,quarter AS yq + ,type AS brand_cat_type + ,case when ta = 'NIAD' then 'DM' else ta end AS TA + ,market AS market + ,zk_brand_category AS zk_brand_category + ,zk_common_name AS zk_common_name + ,zk_manu_des AS zk_manu_des + ,rc_name_en AS rc_name_en + ,province_city AS province_city + ,ytd AS ytd + ,cast(sales_value * 1000000 as decimal(30,10)) AS sales_val + ,cast(sales_volume * 1000000 as decimal(30,10)) AS sales_vol + ,cast(price as decimal(30,10)) as price + ,cast(num_dist_rate as decimal(30,10)) as num_dist_rate + ,cast(weig_dist_rate as decimal(30,10)) as weig_dist_rate + ,cast(value_share as decimal(30,10)) as val_share + ,cast(volume_share as decimal(30,10)) as vol_share + ,replace(key_brand_ytd,'-','') as key_brand_ytd + ,cast(replace(key_brand_rank_ytd,'-','0') as int) as key_brand_rank_ytd + ,replace(top_brand_ytd,'-','') as top_brand_ytd + ,cast(replace(top_brand_ms_ytd,'-','0') as decimal(30,10)) as top_brand_ms_ytd + ,cast(replace(top_brand_inc_ms_ytd,'-','0') as decimal(30,10)) as top_brand_inc_ms_ytd + ,cast(replace(top_brand_gr_ytd,'-','0') as decimal(30,10)) as top_brand_gr_ytd + ,replace(key_brand_qtd,'-','') as key_brand_qtd + ,cast(replace(key_brand_rank_qtd,'-','0') as int) as key_brand_rank_qtd + ,replace(top_brand_qtd,'-','') as top_brand_qtd + ,cast(replace(top_brand_ms_qtd,'-','0') as decimal(30,10)) as top_brand_ms_qtd + ,cast(replace(top_brand_inc_ms_qtd,'-','0') as decimal(30,10)) as top_brand_inc_ms_qtd + ,cast(replace(top_brand_gr_qtd,'-','0') as decimal(30,10)) as top_brand_gr_qtd + ,ranked_by as ranked_by + ,'{pack_flag}' as pack_flag + ,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_insert_dt + ,from_utc_timestamp(current_timestamp(),'UTC+8') as etl_update_dt + from {T} + """ + # 读取数据 + current_query = spark.sql(sql) + #union 数据 + if union_query == None: + union_query = current_query + else: + union_query = union_query.union(current_query) + #返回数据集 / 写入表也行??? + return union_query +brand_result = get_union_brand_data(dfband) +brand_result.write.mode("overwrite").saveAsTable("dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ###新逻辑 +# MAGIC - 修改brand数据,先拆分成月维度的数据 + +# COMMAND ---------- + +# MAGIC %sql +# MAGIC /* +# MAGIC 修改时间:20250311 +# MAGIC 修改人:chenwu +# MAGIC 修改内容:brand来数频率为 季度来数, 但是 pack 为 月度来数据,需要用季度的数据/3得到月度的 +# MAGIC */ +# MAGIC insert overwrite table dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all +# MAGIC with quarterly_table as ( +# MAGIC select +# MAGIC * +# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all +# MAGIC where market not in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液') +# MAGIC -- 范围内只能是 季度来数据的,如果有月度来数据的需要排除掉 +# MAGIC ) +# MAGIC +# MAGIC ,month_table as (--转化成月度数据 +# MAGIC SELECT +# MAGIC SUBSTR(q.yq, 1, 4)*100 + -- 提取年份 +# MAGIC LPAD(m.month_num, 2, '0') -- 补零月份 +# MAGIC AS YYYYMM -- 月份首日 +# MAGIC ,`year` +# MAGIC ,`quarter` +# MAGIC ,yq +# MAGIC ,brand_cat_type +# MAGIC ,TA +# MAGIC ,market +# MAGIC ,zk_brand_category +# MAGIC ,zk_common_name +# MAGIC ,zk_manu_des +# MAGIC ,rc_name_en +# MAGIC ,province_city +# MAGIC ,ytd +# MAGIC ,sales_val /3 --除3 +# MAGIC ,sales_vol /3 --除3 +# MAGIC ,price +# MAGIC ,num_dist_rate +# MAGIC ,weig_dist_rate +# MAGIC ,val_share +# MAGIC ,vol_share +# MAGIC ,key_brand_ytd +# MAGIC ,key_brand_rank_ytd +# MAGIC ,top_brand_ytd +# MAGIC ,top_brand_ms_ytd +# MAGIC ,top_brand_inc_ms_ytd +# MAGIC ,top_brand_gr_ytd +# MAGIC ,key_brand_qtd +# MAGIC ,key_brand_rank_qtd +# MAGIC ,top_brand_qtd +# MAGIC ,top_brand_ms_qtd +# MAGIC ,top_brand_inc_ms_qtd +# MAGIC ,top_brand_gr_qtd +# MAGIC ,ranked_by +# MAGIC ,pack_flag +# MAGIC ,etl_insert_dt +# MAGIC ,etl_update_dt +# MAGIC FROM +# MAGIC quarterly_table q +# MAGIC LATERAL VIEW EXPLODE( -- 为每季度生成三个月 +# MAGIC CASE +# MAGIC WHEN RIGHT(q.yq, 2) = 'Q1' THEN ARRAY(1, 2, 3) +# MAGIC WHEN RIGHT(q.yq, 2) = 'Q2' THEN ARRAY(4, 5, 6) +# MAGIC WHEN RIGHT(q.yq, 2) = 'Q3' THEN ARRAY(7, 8, 9) +# MAGIC WHEN RIGHT(q.yq, 2) = 'Q4' THEN ARRAY(10, 11, 12) +# MAGIC END +# MAGIC ) m AS month_num +# MAGIC ) +# MAGIC +# MAGIC ,other_not_quarterly_table ( +# MAGIC select +# MAGIC * +# MAGIC from dwd.dwd_inc_gnd_ext_retail_nataional_brand_union_all +# MAGIC where market in ('NIAD','Inhaled Extended Market','布地奈德雾化溶液') +# MAGIC -- 范围内只能是 月度来数据的 +# MAGIC ) +# MAGIC +# MAGIC select * from month_table +# MAGIC union all +# MAGIC select * from other_not_quarterly_table