# 一.常用 SQL

# 1.删除表

DROP TABLE if EXISTS ads_sense_rep.tmp_table_${bizdate};
1

# 2.清空表

TRUNCATE TABLE db.table;
1

# 3.删除分区表指定数据

alter table ads_sense_rep.table_name
 drop partition (partition_day = '2022-05-18');
1
2

# 4.是否重复数据

SELECT period_sdate, product_key, size_code, managing_city_no
FROM ads_sense_rep.ads_day_city_brand_sku_size_rep
GROUP BY period_sdate, product_key, size_code, managing_city_no
HAVING COUNT(*) > 1;
1
2
3
4

# 5.迁移数据

insert into ads_sense_rep.table_name partition (partition_day)
SELECT `product_key`,
       `period_sdate`,
       `total30_sal_qty_store_rate`,
       `partition_day`
FROM ads_sense_rep.table_name;
1
2
3
4
5
6

# 6.修改表名

ALTER TABLE ads_sense_rep.table_name RENAME TO ads_sense_rep.table_name;
1

# 7.创建临时表

DROP TABLE if EXISTS ads_sense_rep.tmp_table_${bizdate};
CREATE TABLE if NOT EXISTS ads_sense_rep.tmp_table_${bizdate} AS
SELECT DISTINCT sku.product_code
FROM ads_sense_rep.table sku
WHERE sku.brand_detail_no = '01'
  AND sku.gender_name IN ('男', '女')
  AND sku.category_name1 = '鞋'
  AND sku.begin_date IS NOT NULL
;
1
2
3
4
5
6
7
8
9

# 8.批量插入数据

INSERT INTO ads_sense_rep.ads_dim_natural_year_week_info
( financial_year, financial_year_week, financial_year_start, financial_year_end, week_start_day, week_end_day)
VALUES
(2016, 1, '2016-01-01', '2016-12-31', '2016-01-01', '2016-01-03'),
(2016, 2, '2016-01-01', '2016-12-31', '2016-01-04', '2016-01-10'),
(2016, 3, '2016-01-01', '2016-12-31', '2016-01-11', '2016-01-17'),
(2016, 4, '2016-01-01', '2016-12-31', '2016-01-18', '2016-01-24'),
(2016, 5, '2016-01-01', '2016-12-31', '2016-01-25', '2016-01-31'),
(2025, 53, '2025-01-01', '2025-12-31', '2025-12-29', '2025-12-31');
1
2
3
4
5
6
7
8
9

# 9.with as 写法

WITH tmp_dim_pro_allinfo AS (
    SELECT product_key
    FROM bdc_dim.table
    WHERE gender_name = '女'
      AND category_name1 = '鞋'
      AND brand_detail_no = 'SD01'
)
SELECT *
FROM tmp_dim_pro_allinfo;
1
2
3
4
5
6
7
8
9

# 10.表插入更新

insert into table  #在表后直接插入,不去重

insert overwrite table   #将原表清空后,再插入数据。
1
2
3

# 二.不常用

# 1.创建表

-- 带分区
CREATE TABLE `ads_sense_rep.table_name`
(
    `product_key`                string COMMENT '商品主键',
    `period_sdate`               date COMMENT '日期(yyyy-mm-dd)',
    `size_code`                  string COMMENT '尺码',
    `total7_sal_qty_store_rate`  decimal(18, 4) COMMENT '累计7天转化率',
    `total30_sal_qty_store_rate` decimal(18, 4) COMMENT '累计30天转化率'
) PARTITIONED BY ( `partition_day` string);

-- 不带分区
CREATE TABLE `ads_sense_rep.tabel_name`
(
    `product_key`                string COMMENT '商品主键',
    `period_sdate`               date COMMENT '日期(yyyy-mm-dd)',
    `size_code`                  string COMMENT '尺码',
    `total7_sal_qty_store_rate`  decimal(18, 4) COMMENT '累计7天转化率',
    `total30_sal_qty_store_rate` decimal(18, 4) COMMENT '累计30天转化率'
) ;


-- 带描述
CREATE TABLE `ads_sense_rep.table_name`
(
    `product_key`  string COMMENT '商品主键',
    `period_sdate` date COMMENT '日期(yyyy-mm-dd)',
    `size_code`    string COMMENT '尺码'
) COMMENT '对方答复'
PARTITIONED BY (`partition_day` string);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

创建分区表:

CREATE TABLE `db_name.table_name`
(
    `financial_year`                 int COMMENT '财年',
    `financial_year_week`            int COMMENT '财年第n周',
    `managing_city_no`               string COMMENT '城市编码',
    `etl_time`                       date COMMENT '处理时间'
) comment '城市-品类洞察-品牌概览表'
PARTITIONED BY (
  `partition_financial_year` INT
    ,`partition_financial_year_week` INT);
1
2
3
4
5
6
7
8
9
10
--插入数据
INSERT INTO TABLE db_name.table_name    PARTITION(partition_financial_year,partition_financial_year_week)
SELECT financial_year                         AS financial_year                 --财年
     , financial_year_week                    AS financial_year_week            --第几周
     , date_format(now(), 'yyyy-MM-dd')       AS etl_time                       --跑数时间
     , financial_year                         AS partition_financial_year       --分区财年
     , financial_year_week        AS partition_financial_year_week  --分区财年周
FROM ads_sense_rep.xxxxx distribute BY partition_financial_year,partition_financial_year_week,CAST(rand() * 20 AS INT)
;
1
2
3
4
5
6
7
8
9

# 2.修改表注释

ALTER TABLE ads_sense_rep.table_name SET TBLPROPERTIES ('comment' = '对方答复');
1

# 3.更新数据

update ads_sense_rep.table_name
set sex='女'  where id =1;
1
2

# 4.前一天

格式:${bizdate}
结果:20220608
date_sub(date_format(now(), 'yyyy-MM-dd'), 1)
date_format(now(), 'yyyy-MM-dd') AS etl_time --跑数时间
1
2
3
4

# 5.获取周开始结束

WHERE week_start_day >= date_sub(CURRENT_DATE, 6 + CAST(date_format(CURRENT_DATE, 'u') AS int)) --上周的第一天
  AND week_end_day <= date_sub(CURRENT_DATE, CAST(date_format(CURRENT_DATE, 'u') AS int)) --上周的最后一天
;
1
2
3

# 6.获取月开始结束

WHERE month_start_day >= trunc(add_months(CURRENT_DATE, -1), 'MM') --上个月的第一天
  AND month_end_day <= last_day(date_sub(CURRENT_DATE, DAY(CURRENT_DATE))) --上个月的最后一天
;
1
2
3

# 三.系统 SQL

# 1.hive 查询失败

需要先执行以下语句

set hive.compute.query.using.stats=false;
1

# 2.hive 导数据到 ck

INSERT
	into
	default.table_name
select
	*
from
	jdbc('ch-server','select * from ads_sense_rep.table_name')
1
2
3
4
5
6
7

# 3.刷新表

REFRESH TABLE bdc_dws.dws_day_org_pro_size_sal_ds;
1

# 4.设置 broadcastTimeout

set spark.sql.broadcastTimeout=30000;
1

# 5.耗时统计

select COUNT(*) as querySlowCnt from system.query_log WHERE query_duration_ms > 2000 and `type` = 2;
1

# 四.高阶使用

# 1.分区

INSERT INTO ads_sense_rep.ads_day_city_brand_sku_rep PARTITION(partition_day)
SELECT product_key
FROM bdc_dim.table;
1
2
3

# 2.分区区别

order by,sort by,distribute by,cluster by

  • oreder by: 主要是做全局排序。
  • sort by :局部排序
  • distribute by :分区
  • cluster by:当分区数量小于字段种类时,就有意义。
    • 当 distribute by 和 sort by 所指定的字段相同时,即可以使用 cluster by。
    • 注意:cluster by 指定的列只能是降序,不能指定 asc 和 desc。
上次更新: 10/29/2024, 10:27:50 AM