大数据-244 离线数仓 - Hive ODS 层建表与分区加载实战
ODS层概述
ODS(Operational Data Store) 是数据仓库中的重要组成部分,主要用于存储原始的、经过轻度处理的数据。
功能定位:
- 作为数据仓库的前置层
- 承接从业务系统或外部来源抽取的数据
- 数据通常不进行深度加工和聚合,保持业务的原貌
数据特点:
- 原始性:保留业务系统的字段格式
- 实时性:相比数据仓库,ODS的数据更新更为及时
- 短期存储:ODS中的数据存储周期通常较短,一般只保存最近一段时间的数据(如7天、30天)
ODS的作用:
- 缓冲层作用:避免直接从业务系统中抽取数据对业务系统造成压力
- 数据整合和清洗:对原始数据进行轻度清洗
- 支持实时查询:提供较实时的数据查询服务
- 数据追溯:保留业务数据的原始状态
ODS层建表(7张表)
1. ods_trade_orders(订单表)
use ods;
DROP TABLE IF EXISTS `ods.ods_trade_orders`;
CREATE EXTERNAL TABLE `ods.ods_trade_orders`(
`orderid` int,
`orderno` string,
`userid` bigint,
`status` tinyint,
`productmoney` decimal(10, 0),
`totalmoney` decimal(10, 0),
`paymethod` tinyint,
`ispay` tinyint,
`areaid` int,
`tradesrc` tinyint,
`tradetype` int,
`isrefund` tinyint,
`dataflag` tinyint,
`createtime` string,
`paytime` string,
`modifiedtime` string)
COMMENT '订单表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/data/trade.db/orders/';
2. ods_trade_order_product(订单明细表)
use ods;
DROP TABLE IF EXISTS `ods.ods_trade_order_product`;
CREATE EXTERNAL TABLE `ods.ods_trade_order_product`(
`id` string,
`orderid` decimal(10,2),
`productid` string,
`productnum` string,
`productprice` string,
`money` string,
`extra` string,
`createtime` string)
COMMENT '订单明细表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/data/trade.db/order_product/';
3. ods_trade_product_info(产品信息表)
use ods;
DROP TABLE IF EXISTS `ods.ods_trade_product_info`;
CREATE EXTERNAL TABLE `ods.ods_trade_product_info`(
`productid` bigint,
`productname` string,
`shopid` string,
`price` decimal(10,0),
`issale` tinyint,
`status` tinyint,
`categoryid` string,
`createtime` string,
`modifytime` string)
COMMENT '产品信息表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/data/trade.db/product_info/';
4. ods_trade_product_category(产品分类表)
use ods;
DROP TABLE IF EXISTS `ods.ods_trade_product_category`;
CREATE EXTERNAL TABLE `ods.ods_trade_product_category`(
`catid` int,
`parentid` int,
`catname` string,
`isshow` tinyint,
`sortnum` int,
`isdel` tinyint,
`createtime` string,
`level` tinyint)
COMMENT '产品分类表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/data/trade.db/product_category';
5. ods_trade_shops(商家店铺表)
use ods;
DROP TABLE IF EXISTS `ods.ods_trade_shops`;
CREATE EXTERNAL TABLE `ods.ods_trade_shops`(
`shopid` int,
`userid` int,
`areaid` int,
`shopname` string,
`shoplevel` tinyint,
`status` tinyint,
`createtime` string,
`modifytime` string)
COMMENT '商家店铺表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/data/trade.db/shops';
6. ods_trade_shop_admin_org(商家地域组织表)
use ods;
DROP TABLE IF EXISTS `ods.ods_trade_shop_admin_org`;
CREATE EXTERNAL TABLE `ods.ods_trade_shop_admin_org`(
`id` int,
`parentid` int,
`orgname` string,
`orglevel` tinyint,
`isdelete` tinyint,
`createtime` string,
`updatetime` string,
`isshow` tinyint,
`orgType` tinyint)
COMMENT '商家地域组织表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/data/trade.db/shop_org/';
7. ods_trade_payments(支付方式表)
use ods;
DROP TABLE IF EXISTS `ods.ods_trade_payments`;
CREATE EXTERNAL TABLE `ods.ods_trade_payments`(
`id` string,
`paymethod` string,
`payname` string,
`description` string,
`payorder` int,
`online` tinyint)
COMMENT '支付方式表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/data/trade.db/payments/';
ODS层数据加载脚本
#!/bin/bash
source /etc/profile
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
# 创建目录
hdfs dfs -mkdir -p /user/data/trade.db/product_category/dt=$do_date
hdfs dfs -mkdir -p /user/data/trade.db/shops/dt=$do_date
hdfs dfs -mkdir -p /user/data/trade.db/shop_org/dt=$do_date
hdfs dfs -mkdir -p /user/data/trade.db/payments/dt=$do_date
hdfs dfs -mkdir -p /user/data/trade.db/orders/dt=$do_date
hdfs dfs -mkdir -p /user/data/trade.db/order_product/dt=$do_date
hdfs dfs -mkdir -p /user/data/trade.db/product_info/dt=$do_date
# 数据迁移
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /opt/wzk/datax/product_category.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /opt/wzk/datax/shops.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /opt/wzk/datax/shop_org.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /opt/wzk/datax/payments.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /opt/wzk/datax/orders.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /opt/wzk/datax/order_product.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /opt/wzk/datax/product_info.json
# 加载 ODS 层数据
sql="
alter table ods.ods_trade_orders add partition(dt='$do_date');
alter table ods.ods_trade_order_product add partition(dt='$do_date');
alter table ods.ods_trade_product_info add partition(dt='$do_date');
alter table ods.ods_trade_product_category add partition(dt='$do_date');
alter table ods.ods_trade_shops add partition(dt='$do_date');
alter table ods.ods_trade_shop_admin_org add partition(dt='$do_date');
alter table ods.ods_trade_payments add partition(dt='$do_date');
"
hive -e "$sql"
执行命令:
sh /opt/wzk/hive/ods_load_trade.sh 2020-07-12
结论
允许在7天内快速查询原始交易记录体现ods层什么核心特点:
这体现了ODS层的短期存储和支持实时查询的核心特点。ODS层设计为只保留最近7天的原始数据,允许业务方快速查询最新的交易记录,同时通过DataX将MySQL数据同步到HDFS,再通过Hive外部表加载到ODS层,实现数据的快速接入和查询。