# 一.常用 SQL
# 1.删除表
DROP TABLE if EXISTS ads_sense_rep.tmp_table_${bizdate};
1
# 2.清空表
TRUNCATE TABLE db.table;
1
# 3.删除分区表指定数据
alter table ads_sense_rep.table_name
drop partition (partition_day = '2022-05-18');
1
2
2
# 4.是否重复数据
SELECT period_sdate, product_key, size_code, managing_city_no
FROM ads_sense_rep.ads_day_city_brand_sku_size_rep
GROUP BY period_sdate, product_key, size_code, managing_city_no
HAVING COUNT(*) > 1;
1
2
3
4
2
3
4
# 5.迁移数据
insert into ads_sense_rep.table_name partition (partition_day)
SELECT `product_key`,
`period_sdate`,
`total30_sal_qty_store_rate`,
`partition_day`
FROM ads_sense_rep.table_name;
1
2
3
4
5
6
2
3
4
5
6
# 6.修改表名
ALTER TABLE ads_sense_rep.table_name RENAME TO ads_sense_rep.table_name;
1
# 7.创建临时表
DROP TABLE if EXISTS ads_sense_rep.tmp_table_${bizdate};
CREATE TABLE if NOT EXISTS ads_sense_rep.tmp_table_${bizdate} AS
SELECT DISTINCT sku.product_code
FROM ads_sense_rep.table sku
WHERE sku.brand_detail_no = '01'
AND sku.gender_name IN ('男', '女')
AND sku.category_name1 = '鞋'
AND sku.begin_date IS NOT NULL
;
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 8.批量插入数据
INSERT INTO ads_sense_rep.ads_dim_natural_year_week_info
( financial_year, financial_year_week, financial_year_start, financial_year_end, week_start_day, week_end_day)
VALUES
(2016, 1, '2016-01-01', '2016-12-31', '2016-01-01', '2016-01-03'),
(2016, 2, '2016-01-01', '2016-12-31', '2016-01-04', '2016-01-10'),
(2016, 3, '2016-01-01', '2016-12-31', '2016-01-11', '2016-01-17'),
(2016, 4, '2016-01-01', '2016-12-31', '2016-01-18', '2016-01-24'),
(2016, 5, '2016-01-01', '2016-12-31', '2016-01-25', '2016-01-31'),
(2025, 53, '2025-01-01', '2025-12-31', '2025-12-29', '2025-12-31');
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 9.with as 写法
WITH tmp_dim_pro_allinfo AS (
SELECT product_key
FROM bdc_dim.table
WHERE gender_name = '女'
AND category_name1 = '鞋'
AND brand_detail_no = 'SD01'
)
SELECT *
FROM tmp_dim_pro_allinfo;
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 10.表插入更新
insert into table #在表后直接插入,不去重
insert overwrite table #将原表清空后,再插入数据。
1
2
3
2
3
# 二.不常用
# 1.创建表
-- 带分区
CREATE TABLE `ads_sense_rep.table_name`
(
`product_key` string COMMENT '商品主键',
`period_sdate` date COMMENT '日期(yyyy-mm-dd)',
`size_code` string COMMENT '尺码',
`total7_sal_qty_store_rate` decimal(18, 4) COMMENT '累计7天转化率',
`total30_sal_qty_store_rate` decimal(18, 4) COMMENT '累计30天转化率'
) PARTITIONED BY ( `partition_day` string);
-- 不带分区
CREATE TABLE `ads_sense_rep.tabel_name`
(
`product_key` string COMMENT '商品主键',
`period_sdate` date COMMENT '日期(yyyy-mm-dd)',
`size_code` string COMMENT '尺码',
`total7_sal_qty_store_rate` decimal(18, 4) COMMENT '累计7天转化率',
`total30_sal_qty_store_rate` decimal(18, 4) COMMENT '累计30天转化率'
) ;
-- 带描述
CREATE TABLE `ads_sense_rep.table_name`
(
`product_key` string COMMENT '商品主键',
`period_sdate` date COMMENT '日期(yyyy-mm-dd)',
`size_code` string COMMENT '尺码'
) COMMENT '对方答复'
PARTITIONED BY (`partition_day` string);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
创建分区表:
CREATE TABLE `db_name.table_name`
(
`financial_year` int COMMENT '财年',
`financial_year_week` int COMMENT '财年第n周',
`managing_city_no` string COMMENT '城市编码',
`etl_time` date COMMENT '处理时间'
) comment '城市-品类洞察-品牌概览表'
PARTITIONED BY (
`partition_financial_year` INT
,`partition_financial_year_week` INT);
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
--插入数据
INSERT INTO TABLE db_name.table_name PARTITION(partition_financial_year,partition_financial_year_week)
SELECT financial_year AS financial_year --财年
, financial_year_week AS financial_year_week --第几周
, date_format(now(), 'yyyy-MM-dd') AS etl_time --跑数时间
, financial_year AS partition_financial_year --分区财年
, financial_year_week AS partition_financial_year_week --分区财年周
FROM ads_sense_rep.xxxxx distribute BY partition_financial_year,partition_financial_year_week,CAST(rand() * 20 AS INT)
;
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 2.修改表注释
ALTER TABLE ads_sense_rep.table_name SET TBLPROPERTIES ('comment' = '对方答复');
1
# 3.更新数据
update ads_sense_rep.table_name
set sex='女' where id =1;
1
2
2
# 4.前一天
格式:${bizdate}
结果:20220608
date_sub(date_format(now(), 'yyyy-MM-dd'), 1)
date_format(now(), 'yyyy-MM-dd') AS etl_time --跑数时间
1
2
3
4
2
3
4
# 5.获取周开始结束
WHERE week_start_day >= date_sub(CURRENT_DATE, 6 + CAST(date_format(CURRENT_DATE, 'u') AS int)) --上周的第一天
AND week_end_day <= date_sub(CURRENT_DATE, CAST(date_format(CURRENT_DATE, 'u') AS int)) --上周的最后一天
;
1
2
3
2
3
# 6.获取月开始结束
WHERE month_start_day >= trunc(add_months(CURRENT_DATE, -1), 'MM') --上个月的第一天
AND month_end_day <= last_day(date_sub(CURRENT_DATE, DAY(CURRENT_DATE))) --上个月的最后一天
;
1
2
3
2
3
# 三.系统 SQL
# 1.hive 查询失败
需要先执行以下语句
set hive.compute.query.using.stats=false;
1
# 2.hive 导数据到 ck
INSERT
into
default.table_name
select
*
from
jdbc('ch-server','select * from ads_sense_rep.table_name')
1
2
3
4
5
6
7
2
3
4
5
6
7
# 3.刷新表
REFRESH TABLE bdc_dws.dws_day_org_pro_size_sal_ds;
1
# 4.设置 broadcastTimeout
set spark.sql.broadcastTimeout=30000;
1
# 5.耗时统计
select COUNT(*) as querySlowCnt from system.query_log WHERE query_duration_ms > 2000 and `type` = 2;
1
# 四.高阶使用
# 1.分区
INSERT INTO ads_sense_rep.ads_day_city_brand_sku_rep PARTITION(partition_day)
SELECT product_key
FROM bdc_dim.table;
1
2
3
2
3
# 2.分区区别
order by,sort by,distribute by,cluster by
oreder by
: 主要是做全局排序。sort by
:局部排序distribute by
:分区cluster by
:当分区数量小于字段种类时,就有意义。- 当 distribute by 和 sort by 所指定的字段相同时,即可以使用 cluster by。
- 注意:cluster by 指定的列只能是降序,不能指定 asc 和 desc。