-- Databricks notebook source --------------------------------------------------------------------------- --修改时间:20241126 --修改人:Fanxujia --修改内容: --THC渠道分为14省的数据源以及全国的数据源两种,分在了2个事实表里,下面在维表在引用事实表时,需要将两个事实表union在一起 ---------------------------------------------------------------------------- create or replace temporary view external_sales_union as select YYYYMM,SALES_VALUE_CAL,DATA_SOURCE from dm.dm_tf_ext_unionall_sales union all select YYYYMM,SALES_VALUE_CAL,DATA_SOURCE from dm.dm_tf_external_sales_thc -- COMMAND ---------- --------------------------------------------------------------------------- --修改时间:20260309 --修改人:chenwu --修改内容: --niad 的数据会比其他数据先入数据库,会产生月份差。 ---------------------------------------------------------------------------- create or replace temporary view retail_niad_sales as select GREATEST(niad_month, OTHERS_MONTH) as YYYYMM, NIAD_MONTH from ( select max(niad_month) as niad_month, max(OTHERS_MONTH) as OTHERS_MONTH from ( select niad_month, OTHERS_MONTH from tmp.tmp_dm_td_ext_retail_niad_month union all select niad_month, OTHERS_MONTH from tmp.tmp_dm_td_ext_ec_niad_month ) ) -- COMMAND ---------- create or replace temporary view merged_date_calender as WITH ALL_DATA_YYYY ( select max(yyyymm) YYYYMM, 'AIA' DATATYPE from dm.dm_tf_ext_unionall_sales where DATA_SOURCE = 'AIA(Monthly)' union all select max(yyyymm) YYYYMM, 'DTP' DATATYPE from dm.dm_tf_ext_unionall_sales where DATA_SOURCE = 'DTP(Quarterly)' union all select max(yyyymm) YYYYMM, 'CHPA' DATATYPE from dm.dm_tf_ext_unionall_sales where DATA_SOURCE = 'IQVIA-CHPA(Monthly)' ) SELECT min(YYYYMM) AS MERGED_DATA_MONTH, GREATEST( '202401', CAST((CAST(LEFT(min(yyyymm), 4) AS INT) - 3) * 100 + 1 AS VARCHAR(6)) --三年区间 ) AS MERGED_DATA_MONTH_START from ALL_DATA_YYYY -- COMMAND ---------- insert overwrite dm.dm_td_external_calendar ( date, yyyymmdd, yyyymm, year, quarter, month, month_cn, day, is_holiday, work_days, mtd_work_days, left_work_days, year_quater, half_year, dd, YYYYMM_EXTERNAL, MONTH_EXTERNAL, YEAR_EXTERNAL, YYYYMM_EXTERNAL_AIA, YYYYMM_EXTERNAL_NIAD, YYYYMM_MERGED_DATA ) --公用的日历表最小日期不到2018年,因此需要自己构造一个日历表,日期从2018年起。 --31天 with day_31 as (select explode( array( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ) ) as day ), --30天 day_30 as (select day from day_31 where day <= 30 ), --29天(闰年的2月份) day_29 as (select day from day_31 where day <= 29 ), --28天(非闰年的2月份) day_28 as (select day from day_31 where day <= 28 ), --最后一天是31号的月份 month_31 as (select explode(array(1, 3, 5, 7, 8, 10, 12)) as month ), --最后一天是30号的月份 month_30 as (select explode(array(4, 6, 9, 11)) as month ), --2月份单拎出来 month_2 as (select 2 as month ), --非闰年的月、日 mmdd_365 as (select month, day from month_31 cross join day_31 union all select month, day from month_30 cross join day_30 union all select month, day from month_2 cross join day_28 ), --闰年的月、日 mmdd_366 as (select month, day from month_31 cross join day_31 union all select month, day from month_30 cross join day_30 union all select month, day from month_2 cross join day_29 ), --注意事项: --目前年份设计只到2040年,如果2041年依旧使用此套代码,这里需要做修改 --非闰年的年份 year_365 as (select explode( array( 2018, 2019, 2021, 2022, 2023, 2025, 2026, 2027, 2029, 2030, 2031, 2033, 2034, 2035, 2037, 2038, 2039 ) ) as year ), --闰年的年份 year_366 as (select explode(array(2020, 2024, 2028, 2032, 2036, 2040)) as year ), --从2018年1月1日至2040年12月31日的数据 --最终插入dm表时还会在此基础上框定时间范围 yyyymmdd as (select year, month, day from year_365 cross join mmdd_365 union all select year, month, day from year_366 cross join mmdd_366 ), --补充其他需要的字段 tmp_calendar as (select make_date(year, month, day) as `date`, year * 10000 + month * 100 + day as yyyymmdd, year * 100 + month as yyyymm, concat(month, '月') as month_cn, year, month, day, case when month <= 3 then 1 when month <= 6 then 2 when month <= 9 then 3 else 4 end as quarter, case when month <= 3 then concat(year, 'Q1') when month <= 6 then concat(year, 'Q2') when month <= 9 then concat(year, 'Q3') else concat(year, 'Q4') end as year_quater, case when month <= 6 then concat(year, 'H1') else concat(year, 'H2') end as half_year, --下面4个字段实际在PBI上没有用到,因此直接设为0 0 as work_days, 0 as mtd_work_days, 0 as is_holiday, 0 as left_work_days from yyyymmdd ) select `date`, yyyymmdd, A.yyyymm, `year`, quarter, `month`, month_cn, `day`, is_holiday, work_days, mtd_work_days, left_work_days, year_quater, half_year, substring(yyyymmdd, 7, 2) dd, CASE WHEN A.yyyymm = B.YYYYMM THEN 'R' ELSE A.yyyymm END YYYYMM_EXTERNAL, CASE WHEN A.yyyymm = B.YYYYMM THEN 'R' ELSE `month` END MONTH_EXTERNAL, CASE WHEN A.`year` = LEFT(B.YYYYMM, 4) THEN 'R' ELSE A.`year` END YEAR_EXTERNAL, -- from DM.DM_TD_CALENDAR A ---------------------------------------------------- --修改时间:20241211 --修改人:Fanxujia --修改内容: --给AIA报告增加单独的R月标记 ---------------------------------------------------- case when A.yyyymm = C.YYYYMM THEN 'R' WHEN A.yyyymm >= C.min_yyyymm AND A.yyyymm <=C.yyyymm THEN A.YYYYMM ELSE NULL end as YYYYMM_EXTERNAL_AIA, case when A.yyyymm = D.NIAD_MONTH THEN 'R' else A.yyyymm end as YYYYMM_EXTERNAL_NIAD, case when A.yyyymm = E.MERGED_DATA_MONTH THEN 'R' when A.yyyymm >= E.MERGED_DATA_MONTH_START AND A.YYYYMM<= E.MERGED_DATA_MONTH THEN A.yyyymm else null end as YYYYMM_MERGED_DATA from tmp_calendar A join ( select MAX(YYYYMM) YYYYMM from external_sales_union where SALES_VALUE_CAL <> 0 ) B on 1 = 1 --获得AIA数据的最新月 cross join ( select max(YYYYMM) as YYYYMM,min(yyyymm) AS min_yyyymm from dm.dm_tf_ext_unionall_sales where DATA_SOURCE = 'AIA(Monthly)' and SALES_VALUE_CAL <> 0 ) C left join retail_niad_sales AS D left join merged_date_calender as E WHERE A.yyyymm >= ( SELECT MIN(YYYYMM) FROM external_sales_union ) AND A.yyyymm <= (LEFT(D.YYYYMM, 4) * 100 + 12) AND A.day = 1