大数据-249 离线数仓 - DWD层建表与脚本 DWS层建表与脚本

基本介绍

要处理的表有两张:订单表订单产品表

  • 订单表:是周期性事实表,为保留订单状态,可以使用拉链表进行处理
  • 订单产品表:普通的事实表,用常规方法处理

订单状态

  • -3 用户拒收
  • -2 未付款订单
  • -1 用户取消
  • 0 等待发货
  • 1 配送中
  • 2 用户确认收货

DWD 层的定位

DWD 层可以理解为数仓的”细化层”或”明细层”,其核心作用是将原始数据从 ODS 层向更高质量、更具业务价值的方向转化:

  • 处于数仓分层体系的中间部分
  • 对 ODS 层的数据进行业务逻辑处理、数据清洗、去重、规范化等操作,形成细化的业务事实表
  • 为 DWS 层和其他上层应用提供标准化的数据源

DWD层建表

订单事实表(拉链表):

-- 订单事实表(拉链表)
DROP TABLE IF EXISTS dwd.dwd_trade_orders;
create table dwd.dwd_trade_orders(
  `orderId` int,
  `orderNo` string,
  `userId` bigint,
  `status` tinyint,
  `productMoney` decimal,
  `totalMoney` decimal,
  `payMethod` tinyint,
  `isPay` tinyint,
  `areaId` int,
  `tradeSrc` tinyint,
  `tradeType` int,
  `isRefund` tinyint,
  `dataFlag` tinyint,
  `createTime` string,
  `payTime` string,
  `modifiedTime` string,
  `start_date` string,
  `end_date` string
) COMMENT '订单事实拉链表'
partitioned by (dt string)
STORED AS PARQUET;

DWD层数据加载脚本

dwd_load_trade_orders.sh

#!/bin/bash
source /etc/profile
if [ -n "$1" ]
then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;

INSERT OVERWRITE TABLE dwd.dwd_trade_orders
partition(dt)
SELECT orderId,
    orderNo,
    userId,
    status,
    productMoney,
    totalMoney,
    payMethod,
    isPay,
    areaId,
    tradeSrc,
    tradeType,
    isRefund,
    dataFlag,
    createTime,
    payTime,
    modifiedTime,
    case when modifiedTime is not null
    then from_unixtime(unix_timestamp(modifiedTime, 'yyyy-MM-dd HH:mm:ss'),'yyyy-MM-dd')
    else from_unixtime(unix_timestamp(createTime, 'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd')
    end as start_date,
    '9999-12-31' as end_date,
    from_unixtime(unix_timestamp(createTime, 'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') as dt
FROM ods.ods_trade_orders
WHERE dt='$do_date'

union all

SELECT A.orderId,
    A.orderNo,
    A.userId,
    A.status,
    A.productMoney,
    A.totalMoney,
    A.payMethod,
    A.isPay,
    A.areaId,
    A.tradeSrc,
    A.tradeType,
    A.isRefund,
    A.dataFlag,
    A.createTime,
    A.payTime,
    A.modifiedTime,
    A.start_date,
    CASE WHEN B.orderid IS NOT NULL AND A.end_date > '$do_date'
    THEN date_add('$do_date', -1)
    ELSE A.end_date END AS end_date,
    from_unixtime(unix_timestamp(A.createTime, 'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') as dt
FROM (SELECT * FROM dwd.dwd_trade_orders WHERE dt>date_add('$do_date', -15)) A
left outer join (SELECT * FROM ods.ods_trade_orders WHERE dt='$do_date') B
ON A.orderId = B.orderId;
"
hive -e "$sql"

DWS层建表

订单明细表(轻度汇总事实表)

-- 订单明细表(轻度汇总事实表)。每笔订单的明细
DROP TABLE IF EXISTS dws.dws_trade_orders;
create table if not exists dws.dws_trade_orders(
  orderid string, -- 订单id
  cat_3rd_id string, -- 商品三级分类id
  shopid string, -- 店铺id
  paymethod tinyint, -- 支付方式
  productsnum bigint, -- 商品数量
  paymoney double, -- 订单商品明细金额
  paytime string -- 订单时间
)
partitioned by (dt string)
STORED AS PARQUET;

订单明细表宽表

-- 订单明细表宽表
DROP TABLE IF EXISTS dws.dws_trade_orders_w;
create table if not exists dws.dws_trade_orders_w(
  orderid string, -- 订单id
  cat_3rd_id string, -- 商品三级分类id
  thirdname string, -- 商品三级分类名称
  secondname string, -- 商品二级分类名称
  firstname string, -- 商品一级分类名称
  shopid string, -- 店铺id
  shopname string, -- 店铺名
  regionname string, -- 店铺所在大区
  cityname string, -- 店铺所在城市
  paymethod tinyint, -- 支付方式
  productsnum bigint, -- 商品数量
  paymoney double, -- 订单明细金额
  paytime string -- 订单时间
)
partitioned by (dt string)
STORED AS PARQUET;

DWS层加载数据脚本

dws_load_trade_orders.sh

#!/bin/bash
source /etc/profile
if [ -n "$1" ]
then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
insert overwrite table dws.dws_trade_orders
partition(dt='$do_date')
select t1.orderid as orderid,
    t3.categoryid as cat_3rd_id,
    t3.shopid as shopid,
    t1.paymethod as paymethod,
    t2.productnum as productsnum,
    t2.productnum*t2.productprice as pay_money,
    t1.paytime as paytime
from (select orderid, paymethod, paytime
      from dwd.dwd_trade_orders
      where dt='$do_date') T1
left join
(select orderid, productid, productnum, productprice
 from ods.ods_trade_order_product
 where dt='$do_date') T2
on t1.orderid = t2.orderid
left join
(select productid, shopid, categoryid
 from dim.dim_trade_product_info
 where start_dt <= '$do_date'
 and end_dt >= '$do_date' ) T3
on t2.productid=t3.productid;

insert overwrite table dws.dws_trade_orders_w
partition(dt='$do_date')
select t1.orderid,
    t1.cat_3rd_id,
    t2.thirdname,
    t2.secondname,
    t2.firstname,
    t1.shopid,
    t3.shopname,
    t3.regionname,
    t3.cityname,
    t1.paymethod,
    t1.productsnum,
    t1.paymoney,
    t1.paytime
from (select orderid,
      cat_3rd_id,
      shopid,
      paymethod,
      productsnum,
      paymoney,
      paytime
      from dws.dws_trade_orders
      where dt='$do_date') T1
join
(select thirdid, thirdname, secondid, secondname,
     firstid, firstname
     from dim.dim_trade_product_cat
     where dt='$do_date') T2
on T1.cat_3rd_id = T2.thirdid
join
(select shopid, shopname, regionname, cityname
 from dim.dim_trade_shops_org
 where dt='$do_date') T3
on T1.shopid = T3.shopid
"
hive -e "$sql"

数据来源表

  • dwd.dwd_trade_orders(拉链表、分区表)
  • ods.ods_trade_product(分区表)
  • dim.dim_trade_product_info(维表、拉链表)
  • dim.dim_trade_product_cat(分区表)
  • dim.dim_trade_shops_org(分区表)