從0到1搭建數(shù)倉DWD層案例實踐
關(guān)注公眾號:大數(shù)據(jù)技術(shù)派,回復(fù)資料,領(lǐng)取1024G資料。




drop table if exists dwd_fact_payment_info;create external table dwd_fact_payment_info (`id` string COMMENT 'id',`out_trade_no` string COMMENT '對外業(yè)務(wù)編號',`order_id` string COMMENT '訂單編號',`user_id` string COMMENT '用戶編號',`alipay_trade_no` string COMMENT '支付寶交易流水編號',`payment_amount` decimal(16,2) COMMENT '支付金額',`subject` string COMMENT '交易內(nèi)容',`payment_type` string COMMENT '支付類型',`payment_time` string COMMENT '支付時間',`province_id` string COMMENT '省份ID') COMMENT '支付事實表表'PARTITIONED BY (`dt` string)stored as parquetlocation '/warehouse/gmall/dwd/dwd_fact_payment_info/'tblproperties?("parquet.compression"="lzo");
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;insert?overwrite?table?dwd_fact_payment_info?partition(dt='2021-05-03')selectpi.id,pi.out_trade_no,pi.order_id,pi.user_id,pi.alipay_trade_no,pi.total_amount,pi.subject,pi.payment_type,pi.payment_time,oi.province_idfrom(????select?*?from?ods_payment_info?where?dt='2021-05-03')pijoin(select id, province_id from ods_order_info where dt='2021-05-03')oion?pi.order_id?=?oi.id;
drop table if exists dwd_fact_order_refund_info;create external table dwd_fact_order_refund_info(`id` string COMMENT '編號',`user_id` string COMMENT '用戶ID',`order_id` string COMMENT '訂單ID',`sku_id` string COMMENT '商品ID',`refund_type` string COMMENT '退款類型',`refund_num` bigint COMMENT '退款件數(shù)',`refund_amount` decimal(16,2) COMMENT '退款金額',`refund_reason_type` string COMMENT '退款原因類型',`create_time` string COMMENT '退款時間') COMMENT '退款事實表'PARTITIONED BY (`dt` string)stored as parquetlocation '/warehouse/gmall/dwd/dwd_fact_order_refund_info/'tblproperties?("parquet.compression"="lzo");
insert overwrite table dwd_fact_order_refund_info partition(dt='2021-05-03')selectid,user_id,order_id,sku_id,refund_type,refund_num,refund_amount,refund_reason_type,create_timefrom ods_order_refund_infowhere?dt='2021-05-03';

drop table if exists dwd_fact_cart_info;create external table dwd_fact_cart_info(`id` string COMMENT '編號',`user_id` string COMMENT '用戶id',`sku_id` string COMMENT 'skuid',`cart_price` string COMMENT '放入購物車時價格',`sku_num` string COMMENT '數(shù)量',`sku_name` string COMMENT 'sku名稱 (冗余)',`create_time` string COMMENT '創(chuàng)建時間',`operate_time` string COMMENT '修改時間',`is_ordered` string COMMENT '是否已經(jīng)下單。1為已下單;0為未下單',`order_time` string COMMENT '下單時間',`source_type` string COMMENT '來源類型',`srouce_id` string COMMENT '來源編號') COMMENT '加購事實表'PARTITIONED BY (`dt` string)stored as parquetlocation '/warehouse/gmall/dwd/dwd_fact_cart_info/'tblproperties?("parquet.compression"="lzo");
insert overwrite table dwd_fact_cart_info partition(dt='2021-05-03')selectid,user_id,sku_id,cart_price,sku_num,sku_name,create_time,operate_time,is_ordered,order_time,source_type,source_idfrom ods_cart_infowhere?dt='2020-06-14';

drop table if exists dwd_fact_coupon_use;create external table dwd_fact_coupon_use(`id` string COMMENT '編號',`coupon_id` string COMMENT '優(yōu)惠券ID',`user_id` string COMMENT 'userid',`order_id` string COMMENT '訂單id',`coupon_status` string COMMENT '優(yōu)惠券狀態(tài)',`get_time` string COMMENT '領(lǐng)取時間',`using_time` string COMMENT '使用時間(下單)',`used_time` string COMMENT '使用時間(支付)') COMMENT '優(yōu)惠券領(lǐng)用事實表'PARTITIONED BY (`dt` string)stored as parquetlocation '/warehouse/gmall/dwd/dwd_fact_coupon_use/'tblproperties?("parquet.compression"="lzo");
`get_time`?string??COMMENT?'領(lǐng)取時間',`using_time`?string??COMMENT?'使用時間(下單)',`used_time`?string??COMMENT?'使用時間(支付)'

insert overwrite table dwd_coupon_use partition(dt)selectid,coupon_id,user_id,order_id,coupon_status,get_time,using_time,used_time,expire_time,coalesce(date_format(used_time,'yyyy-MM-dd'),date_format(expire_time,'yyyy-MM-dd'),'9999-99-99')from ods_coupon_usewhere?dt='2021-05-03';

set hive.exec.dynamic.partition.mode=nonstrict;set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;insert overwrite table dwd_fact_coupon_use partition(dt)selectif(new.id is null,old.id,new.id),if(new.coupon_id is null,old.coupon_id,new.coupon_id),if(new.user_id is null,old.user_id,new.user_id),if(new.order_id is null,old.order_id,new.order_id),if(new.coupon_status is null,old.coupon_status,new.coupon_status),if(new.get_time is null,old.get_time,new.get_time),if(new.using_time is null,old.using_time,new.using_time),if(new.used_time is null,old.used_time,new.used_time),date_format(if(new.get_time is null,old.get_time,new.get_time),'yyyy-MM-dd')from(selectid,coupon_id,user_id,order_id,coupon_status,get_time,using_time,used_timefrom dwd_fact_coupon_usewhere dt in(selectdate_format(get_time,'yyyy-MM-dd')from ods_coupon_use????????where?dt='2021-05-04'))oldfull outer join(selectid,coupon_id,user_id,order_id,coupon_status,get_time,using_time,used_timefrom ods_coupon_usewhere dt='2021-05-04')newon?old.id=new.id;
數(shù)據(jù)倉庫第4版 數(shù)據(jù)倉庫工具箱 DAMA數(shù)據(jù)管理知識體系指南 華為數(shù)據(jù)之道
數(shù)倉建模—指標(biāo)體系
數(shù)倉建?!獙挶淼脑O(shè)計
Spark SQL知識點與實戰(zhàn)
Hive計算最大連續(xù)登陸天數(shù)
Flink計算pv和uv的通用方法
評論
圖片
表情
