# 一.简单介绍

# 1.clickhouse 特点

  • 查询速度快
  • 支持 SQL
  • 采用 MPP 架构
  • 列式存储
  • 向量化执行引擎
    • 消除循环来提高效率,比如需要循环 3 次才能完成的工作,转化为 3 个工作并行处理。
  • 适用于 OLAP 场景
  • 相同场景的不同数据量使用不同算法, 比如
    • 常量字符串查询,使用 volnitsky 算法。
    • 非常量字符串,使用 CPU 的向量化 SIMD 指令。
    • 字符串正则匹配,使用 re2 和 hyperscan 算法。

# 2.应用场景

  • 大多数是读请求
  • 数据总是以相当大的批(> 1000 rows)进行写入
  • 不修改已添加的数据
  • 每次查询都从数据库中读取大量的行,但是同时又仅需要少量的列
  • 宽表,即每个表包含着大量的列
  • 较少的查询(通常每台服务器每秒数百个查询或更少)
  • 对于简单查询,允许延迟大约 50 毫秒
  • 列中的数据相对较小: 数字和短字符串(例如,每个 UR60 个字节)
  • 处理单个查询时需要高吞吐量(每个服务器每秒高达数十亿行)
  • 事务不是必须的
  • 对数据一致性要求低
  • 每一个查询除了一个大表外都很小
  • 查询结果明显小于源数据,换句话说,数据被过滤或聚合后能够被盛放在单台服务器的内存中

# 3.使用的存储引擎?

  • MergeTree:基于时间排序
  • ReplacingMergeTree
  • SummingMergeTree
  • AggregatingMergeTree
  • CollapsingMergeTree
  • VersionedCollapsingMergeTree
  • GraphiteMergeTree

# 4.使用 clickhouse 注意事项

  1. 严格区分大小写,注意库名和字段的大小写
  2. 子查询的查询结果需要加 as 别名
  3. 不支持 ndv 函数,支持使用 count(distinct column)
    • 使用方法,如果设置如下参数:
    • set APPX_COUNT_DISTINCT=true;
    • 则所有的 count(distinct col)会在底层计算的时候转成 ndv() 函数,也就是说,在 sql 中可以直接使用 count(distinct col),如果不配置上述参数,则在 sql 中直接写 ndv(col) 也可以
  4. ifnull 函数在 null 时需要有默认值 ifnull(ddopsd.all_sal_act_qty,0)
  5. 查询条件字段最好加上''单引号,避免类型不匹配
  6. 涉及到计算函数的字段,字段类型必须是 Number 类型,不能是 String
  7. 超过 50g 的表不能直接删除,需要添加一个空文件 sudo touch /data/clickhouse/flags/force_drop_table && sudo chmod 666 /data/clickhouse/flags/force_drop_table
  8. 创建表的 order by 会影响查询效率(相当于 mysql 的索引)
  9. left join 尽量使用 any left join
  10. 存在 null 值的字段不能指定为 order by 索引
  11. clickhouse 的字段是 Int32 时,插入数据不能为 null
  12. null 值不能转化为 Int32 类型,会报错
  13. clickhouse 在 21.3.1 以后的版本支持开窗函数
  14. clickhouse 的字段是 Int32 时,插入数据不能为 null
  15. 空值问题
    1. 空表,Nullable 与非空类型可以互转;
    2. Nullable 字段,如果记录不带有 Null 值,可以从 Nullable 转成非空类型;
    3. 含有 null 值的字段不允许转成非空类型;
    4. Nullable 字段不允许用于 order by;

# 5.clickhouse 支持的函数

支持的函数 (opens new window)

# 二.系统 SQL

# 1.大于 50g 不能删除

sudo touch /data/clickhouse/flags/force_drop_table && sudo chmod 666 /data/clickhouse/flags/force_drop_table
1

# 2.查询表的数据量

要统计 ClickHouse 中数据量最大的表,并按数据量降序排列,可以执行以下 SQL 语句:

SELECT
    table,
    sum(bytes) AS size
FROM system.parts
GROUP BY table
ORDER BY size DESC;
1
2
3
4
5
6

查询表行数和数据量

SELECT database, name, total_rows, round(total_bytes / 1024 / 1024 / 1024, 4) as total_memory
from system.tables t
where t.database != 'system'
order by t.database, t.name
1
2
3
4

# 3.查看异步删除是否完成

SELECT
    database,
    table,
    command,
    create_time,
    is_done
FROM system.mutations
order by  create_time  DESC
LIMIT 10
1
2
3
4
5
6
7
8
9

# 4.查询执行计划

	select * from  system.query_log
	WHERE query_kind ='Alter'
	order by query_start_time desc
1
2
3

# 5.查看版本号

SELECT  version();
1

# 6.查询 ck 下的所有数据库

SELECT * from system.databases d ;
1

# 7.查询是否开启开窗函数

SELECT * from `system`.settings s where name = 'allow_experimental_window_functions'
1
  • allow_experimental_window_functions=1 代表开启
  • allow_experimental_window_functions=0 未开启

# 8.开窗函数配置

, sum(ifnull(hot_size_sal_qty, 0)) over (partition by product_code order by period_sdate asc rows between unbounded preceding and current row) as total_hot_size_sal_qty
,rank() over (partition by period_sdate  order by total360_sal_qty_store_rate desc) as day_360_sal_qty_store_rate_rank
1
2

# 9.查询数据量

通过以下 sql 可以按表的数据数据大小降序排列

SELECT
    database,
    table,
    round(sum(bytes)/1024/1024, 2) as size_in_mb,
    round(sum(bytes)/1024/1024/1024, 2) as size_in_gb,
    disk_name
FROM system.parts
where (table like '%_v2%')
GROUP BY database,table,disk_name
ORDER BY size_in_mb DESC;
1
2
3
4
5
6
7
8
9
10

# 10.查询磁盘大小

# 查询磁盘大小
SELECT name, path, formatReadableSize(free_space) AS free, formatReadableSize(total_space) AS total,
formatReadableSize(keep_free_space) AS reserved FROM system.disks;
1
2
3

# 三.新增操作

# 1.创建库

#使用默认库引擎创建库
CREATE DATABASE IF NOT EXISTS chtest;
1
2

# 2.创建表

CREATE TABLE default.boss
(
    row_id                 String,
    user_id                Int32
) ENGINE = MergeTree() ORDER BY
    (row_id) SETTINGS index_granularity = 16384;
1
2
3
4
5
6

设置策略

CREATE TABLE your_table_name (
    column1 data_type,
    column2 data_type,
    columnN data_type
) SETTINGS (
    index_granularity = 8192,
    storage_policy = 'beinsight'
);
1
2
3
4
5
6
7
8

# 3.select 建表

create table t_name_8888
ENGINE = MergeTree
ORDER BY
tuple()
SETTINGS index_granularity = 8192
as
select
	*
from
	dw_1_sad limit 0,1;
1
2
3
4
5
6
7
8
9
10

# 4.clickhouse 多个 order

CREATE TABLE bi.boss_info2
(
    row_id                 String,
    user_id                Int32,
    offline_props_time     String,
    offline_vip_distribute String,
    offline_vip_time       String,
    pay_now                String,
    data_dt                Date
) ENGINE = MergeTree() PARTITION BY data_dt ORDER BY
    (industry, l1_name, l2_name, l3_name, job_city, job_area)
    SETTINGS index_granularity = 16384;
1
2
3
4
5
6
7
8
9
10
11
12

# 5.创建物化视图

create materialized view views.o6
    engine = MergeTree
    order by period_sdate
    POPULATE
    as
select xxxx
1
2
3
4
5
6

# 6.新增列

ALTER TABLE `default`.belle_out ADD COLUMN product_year_name Nullable(String);
ALTER TABLE `default`.belle_out ADD COLUMN season_name Nullable(String);
1
2

# 7.插入语句

INSERT INTO
  default.sales_w (`suppkey`, `brand`, `AA`, `AB`, `AC`, `AD`)
VALUES
  (1, 'nike', 99, 98, 97, 96);
INSERT INTO
  default.sales_w (`suppkey`, `brand`, `AA`, `AB`, `AC`, `AD`)
VALUES
  (2, 'nike', 99, 98, 97, 96);
INSERT INTO
  default.sales_w (`suppkey`, `brand`, `AA`, `AB`, `AC`, `AD`)
VALUES
  (3, 'nike', 99, 98, 97, 96);

1
2
3
4
5
6
7
8
9
10
11
12
13

# 8.通过查询新增

insert into default.tmp_dws_day_org_pro_size_inv_ds select t1.period_sdate as period_sdate,t3.PRODUCT_CODE as PRODUCT_CODE, t1.size_code    as size_code, t1.store_key    as store_key from default.dws_day_org_pro_size_inv_ds t1 any left join default.dim_org_allinfo t2 on (t1.store_key = t2.organ_key) any left join default.dim_pro_allinfo t3 on (t1.product_key = t3.PRODUCT_KEY) where t1.period_sdate >= '$year-$month-01' and t1.period_sdate <= '$year-$month-31';
1

# 9.create table

1.普通建表

CREATE TABLE dis_j.D_F1_shard on  cluster cluster_demo (
`product_code` String,
 `package_name` String
) ENGINE = MergeTree ORDER BY package_name SETTINGS index_granularity = 8192
1
2
3
4

2.分布表

CREATE TABLE dis_j.D_F1_all on  cluster cluster_demo as dis_j.D_F1_shard
ENGINE = Distributed('cluster_demo', 'dis_j', D_F1_shard, rand())
1
2

3.复制表

复制已有的一个表创建表。如果不指定 engine,默认会复制源表 engine。

CREATE TABLE dis_j.tmp1 as dis_j.D_F1_shard
1

4.集群上复制表

复制已有的一个表创建表。在集群上执行,要把 on cluster 写在 as 前面。

CREATE TABLE dis_j.tmp1 on cluster cluster_demo as dis_j.D_F1_shard
1

5.select 创建表

使用 select 查询结果来创建一个表,需要指定 engine。字段列表会使用查询结果的字段列表。

CREATE TABLE dis_j.tmp1 ENGINE = MergeTree ORDER BY package_name AS select * from dis_j.D_F1_shard
1

6.分区表上再分区

最后,在分区表之上再创建分区表可以吗?

–在 ck 中创建表:

create table dis_j.t_area_shard on cluster cluster_demo
(
  area_id         String,
  area_name       String
)ENGINE = MergeTree  ORDER BY area_id SETTINGS index_granularity = 8192
1
2
3
4
5

–分布表

CREATE TABLE dis_j.t_area_all on cluster cluster_demo as dis_jiakai.t_area_shard
ENGINE = Distributed('cluster_demo', 'dis_j', t_area_shard,  rand())
1
2
CREATE TABLE dis_jiakai.t_area_all2 on cluster cluster_demo as dis_jiakai.t_area_all
ENGINE = Distributed('cluster_demo', 'dis_jiakai', t_area_all,  rand())
1
2

执行成功!

试着查询一下:表可建,但不可用!

select * from dis_jiakai.t_area_all2
1
SQL 错误 [48]: [ClickHouse](https://so.csdn.net/so/search?q=ClickHouse) exception, code: 48, host: 10.9.20.231, port:
8123; Code: 48, e.displayText() = DB::Exception: Distributed on
Distributed is not supported (version 19.9.2.4 (official build))
1
2
3

# 四.查询 SQL

# 1.查询年月

#查询年月
SELECT
	DISTINCT year(period_sdate) as years,
	month(period_sdate) as months
from
	tmp_dws_day_org_pro_size_inv_ds_r1
order by
	years,
	months;
1
2
3
4
5
6
7
8
9
#查询分
-- 获取日期分
SELECT  formatDateTime(now(),'%Y-%M-%d %H:%M');

-- 获取开始的分钟
SELECT  toStartOfMinute(NOW()) as event_time;
1
2
3
4
5
6

# 2.计数 sql

	SELECT
	COUNT(1)
from
	default.tmp_dws_day_org_pro_size_inv_ds_r1
WHERE
	period_sdate >= '2019-03-01'
	and period_sdate <= '2019-03-31';
1
2
3
4
5
6
7

# 3.排序函数

SELECT
	rowNumberInAllBlocks()+ 1 AS total_SAL_rank
FROM
	(
        SELECT
        o2.PERIOD_SDATE,
        o2.total_SAL_QTY
        FROM
        o2
        ORDER BY o2.total_SAL_QTY DESC
        LIMIT 1 BY o2.PERIOD_SDATE
    );
1
2
3
4
5
6
7
8
9
10
11
12

# 4.日期处理

将 int 类型转为 date 类型

parseDateTimeBestEffort(toString(20191201000407)) as wet
1

# 5.clickhouse 中的 join

ClickHouse JOIN 查询语法如下:

SELECT <expr_list>FROM <left_table>[GLOBAL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI|ANY|ASOF] JOIN <right_table>(ON <expr_list>)|(USING <column_list>) ...
1
  • 从 right_table 读取该表全量数据,在内存中构建 HASH MAP;
  • 从 left_table 分批读取数据,根据 JOIN KEY 到 HASH MAP 中进行查找,如果命中,则该数据作为 JOIN 的输出;

img

从这个实现中可以看出,如果 right_table 的数据量超过单机可用内存空间的限制,则 JOIN 操作无法完成。通常,两表 JOIN 时,将较小表作为 right_table.

1.创建表

CREATE TABLE default.tmp_pro_size_contribution_rate
(

    `product_key` String,

    `size_code` String,

    `sal_rank` UInt64,

    `total_sal_qty` Nullable(Int64),

    `total_sal_size_qty` Nullable(Int64),

    `contribution_rate` Nullable(Float64)
)
ENGINE = MergeTree
ORDER BY (product_key,
 size_code)
SETTINGS index_granularity = 8192;


CREATE TABLE default.tmp_pro_size_t12
(

    `product_key` String,

    `size_code` String,

    `name` String
)
ENGINE = MergeTree
ORDER BY (product_key,
 size_code)
SETTINGS index_granularity = 8192;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

2.插入数据

INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '215', 99, 98, 97, 50);
INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '220', 99, 98, 97, 50);
INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '225', 99, 98, 97, 50);
INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '230', 99, 98, 97, 50);
INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '235', 99, 98, 97, 50);
INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '240', 99, 98, 97, 50);
INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '245', 99, 98, 97, 50);
INSERT INTO default.tmp_pro_size_contribution_rate (`product_key`, `size_code`, `sal_rank`, `total_sal_qty`,
                                                    `total_sal_size_qty`, `contribution_rate`)
VALUES ('1', '250', 99, 98, 97, 50);

INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '215', '1');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '215', '2');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '220', '');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '225', '');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '230', '');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '235', '');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '240', '');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '245', '');
INSERT INTO default.tmp_pro_size_t12
(product_key, size_code, name)
VALUES('1', '250', '');
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

3.不带 any 查询

select * from default.tmp_pro_size_contribution_rate t1 left join default.tmp_pro_size_t12 t2 on t1.size_code = t2.size_code;
1

image-20220224113508967

4.带 any 查询

select * from default.tmp_pro_size_contribution_rate t1 left any join 	default.tmp_pro_size_t12 t2 on  t1.size_code=t2.size_code  where size_code ='215';
1

image-20220224113455852

5.总结

消除笛卡尔乘积最根本的原因不是在于连接,而是在于唯一 ID,就像学号,一个学生就只有一个学号,学号就是这个学生的唯一标识码。 左连接只是以左边的表为基准,左边的 ID 和右边 ID 都是唯一,就不会产生笛卡尔现象,如果右边有两个 ID 对应左边一个 ID,就算你是左连接,一样会产生 1 对多的现象。

使用 any 可以消除笛卡尔积,对结果集有一定的影响

# 6.case when

case when 在 clickhouse 中的使用

select  case when new_old_product = 1 then '新货'
 when new_old_product=2 then '旧货' else '新旧货'
end
,*
from default.table
where financial_year = '2023'
  and financial_year_week = '1'
  and new_old_product = 0
order by un_sal_week
;
1
2
3
4
5
6
7
8
9
10

# 7.日期加一

SELECT DATE_ADD('2023-07-11', INTERVAL 1 DAY) AS next_day;
1

# 8.除法小数

SELECT ROUND(column1 / column2, 4) AS division_result FROM your_table;
1

# 9.判断 null

SELECT * FROM your_table WHERE column1 IS NULL;
SELECT * FROM your_table WHERE column1 IS NOT NULL;
1
2

# 10.转换为整数

SELECT CAST(12.34 AS Int64) AS integer_value;
1

# 11.重复数据

SELECT period_sdate, product_key, size_code, managing_city_no
FROM ads_sense_rep.ads_day_city_brand_sku_size_rep
GROUP BY period_sdate, product_key, size_code, managing_city_no
HAVING COUNT(*) > 1;
1
2
3
4

# 五.修改操作

# 1.修改字段 sql

ALTER TABLE qac RENAME COLUMN provId TO ``
1

# 2.clickhouse 修改字段名

ALTER TABLE test_8 RENAME COLUMN teacher_name TO class_teacher_name;
1

# 3.clickhosue 复制数据

create table newtest as test;#先创建表
insert into newtest select * from test;#再插入数据
1
2

# 4.修改数据类型

ALTER table default.dws_day_mgmt_pro_sal_ds MODIFY COLUMN period_sdate Date COMMENT '日期(yyyy-mm-dd)'
1

# 5.修改备注

ALTER table default.dim_pro_allinfo modify COMMENT 'dim_商品信息表【商品】'
1

# 6.修改表名

RENAME TABLE back.dsd_back TO back.dsd_back_01;
1

# 六.删除相关

# 1.删除库

drop database IF EXISTS base_db
1

# 2.删除表

DROP table if EXISTS `default`.`test`
1

# 3.删除数据

alter table
    default.table_name
delete
where
	period_sdate >= '2020-03-01'
	and period_sdate <= '2020-12-31';
1
2
3
4
5
6

# 4.清空数据

通过筛选条件:

-- ck清空表

ALTER  table tmp.dim_pro_allinfo_bak delete where 1=1;
1
2
3

通过 TRUNCATE:

(clickhouse-hive-mysql 都是通用的)

TRUNCATE TABLE dbname.table
1

# 5.查询表大小

查询表大小

SELECT
    sum(rows) AS `总行数`,
    formatReadableSize(sum(data_uncompressed_bytes)) AS `原始大小`,
    formatReadableSize(sum(data_compressed_bytes)) AS `压缩大小`,
    round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS `压缩率`,
    `table` AS `表名`
FROM system.parts where database = 'system' group by `table`
1
2
3
4
5
6
7

# 6.清理日志

方法一:使用clickhouse的mutation语句

alter table system.query_log delete where event_date < '2022-01-01'
1

方法二:直接删除分区

# 根据分区删除
alter table system.query_log drop partition '202201';

# 查看分区情况
SELECT * FROM system.parts where database = 'system' and `table`= 'query_log'
1
2
3
4
5

方式三:定期删除

ALTERTABLE query_log MODIFY TTL event_date + toIntervalDay(15);
1

方式四:配置文件

<query_log>
    <database>system</database>
    <table>query_log</table>
    <engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
    <flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
1
2
3
4
5
6

# 七.时间日期函数

# 1.时间日期

  SELECT
        toDateTime('2016-06-15 23:00:00') AS time,
        toDate(time) AS date_local,
        toDate(time, 'Asia/Yekaterinburg') AS date_yekat,
        toString(time, 'US/Samoa') AS time_samoa

    ┌────────────────time─┬─date_local─┬─date_yekat─┬─time_samoa──────────┐
    │ 2016-06-15 23:00:002016-06-152016-06-162016-06-15 09:00:00 │
    └─────────────────────┴────────────┴────────────┴─────────────────────┘
1
2
3
4
5
6
7
8
9

# 2.时间函数

   now()                // 2020-04-01 17:25:40     取当前时间
    toYear()             // 2020                    取日期中的年份
    toMonth()            // 4                       取日期中的月份
    today()              // 2020-04-01              今天的日期
    yesterday()          // 2020-03-31              昨天的额日期
    toDayOfYear()        // 92                      取一年中的第几天
    toDayOfWeek()        // 3                       取一周中的第几天
    toHour()             //17                       取小时
    toMinute()           //25                       取分钟
    toSecond()           //40                       取秒
    toStartOfYear()      //2020-01-01               取一年中的第一天
    toStartOfMonth()     //2020-04-01               取当月的第一天

    formatDateTime(now(),'%Y-%m-%d')        // 2020*04-01         指定时间格式
    toYYYYMM()                              //202004
    toYYYYMMDD()                            //20200401
    toYYYYMMDDhhmmss()                      //20200401172540
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# 3.当前日期相关

SELECT
    toDateTime('2019-07-30 10:10:10') AS time,

    -- 将DateTime转换成Unix时间戳
    toUnixTimestamp(time) as unixTimestamp,

    -- 保留 时-分-秒
    toDate(time) as date_local,
    toTime(time) as date_time,   -- 将DateTime中的日期转换为一个固定的日期,同时保留时间部分。

    -- 获取年份,月份,季度,小时,分钟,秒钟
    toYear(time) as get_year,
    toMonth(time) as get_month,

    -- 一年分为四个季度。1(一季度:1-3),2(二季度:4-6),3(三季度:7-9),4(四季度:10-12)
    toQuarter(time) as get_quarter,
    toHour(time) as get_hour,
    toMinute(time) as get_minute,
    toSecond(time) as get_second,

    -- 获取 DateTime中的当前日期是当前年份的第几天,当前月份的第几日,当前星期的周几
    toDayOfYear(time) as "当前年份中的第几天",
    toDayOfMonth(time) as "当前月份的第几天",
    toDayOfWeek(time) as "星期",
    toDate(time, 'Asia/Shanghai') AS date_shanghai,
    toDateTime(time, 'Asia/Shanghai') AS time_shanghai,

    -- 得到当前年份的第一天,当前月份的第一天,当前季度的第一天,当前日期的开始时刻
    toStartOfYear(time),
    toStartOfMonth(time),
    toStartOfQuarter(time),
    toStartOfDay(time) AS cur_start_daytime,
    toStartOfHour(time) as cur_start_hour,
    toStartOfMinute(time) AS cur_start_minute,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

# 4.未来时间

-- 第一种,日期格式(指定日期,需注意时区的问题)
WITH
    toDate('2019-09-09') AS date,
    toDateTime('2019-09-09 00:00:00') AS date_time
SELECT
    addYears(date, 1) AS add_years_with_date,
    addYears(date_time, 0) AS add_years_with_date_time;

-- 第二种,日期格式(当前,本地时间)
WITH
    toDate(now()) as date,
    toDateTime(now()) as date_time
SELECT
    now() as now_time,-- 当前时间
    -- 之后1年
    addYears(date, 1) AS add_years_with_date,
    addYears(date_time, 1) AS add_years_with_date_time,

    -- 之后1月
    addMonths(date, 1) AS add_months_with_date,
    addMonths(date_time, 1) AS add_months_with_date_time,

    --之后1周
    addWeeks(date, 1) AS add_weeks_with_date,
    addWeeks(date_time, 1) AS add_weeks_with_date_time,

    -- 之后1天
    addDays(date, 1) AS add_days_with_date,
    addDays(date_time, 1) AS add_days_with_date_time,

    --之后1小时
    addHours(date_time, 1) AS add_hours_with_date_time,

    --之后1分中
    addMinutes(date_time, 1) AS add_minutes_with_date_time,

    -- 之后10秒钟
    addSeconds(date_time, 10) AS add_seconds_with_date_time,

     -- 之后1个季度
    addQuarters(date, 1) AS add_quarters_with_date,
    addQuarters(date_time, 1) AS add_quarters_with_date_time;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

# 5.获取最后 2 天

#日期年月日
SELECT *
FROM your_table
WHERE your_date_column >= today() - INTERVAL 2 DAY
;
#日期年月日时分秒
SELECT *
FROM your_table
WHERE toDate(your_date_column) >= today() - INTERVAL 2 DAY
;
1
2
3
4
5
6
7
8
9
10

# 6.获取前 n 天

#获取当前时间的N天前
select subtractDays(now(),n)

#获取指定日期的N天前
select subtractDays(toDateTime('2020-11-29 09:15:00'),n)
1
2
3
4
5

# 八.java 代码

# 1.代码分页

@Override
public PreviewData preview(Integer pageSize, Integer pageNo) {
    DdlNode ddlNode = currentNode();
    //输出节点运行直接写数据到目标表||非输出节点运行就是预览数据
    if (NodeTypeEnum.OUT.name().equalsIgnoreCase(ddlNode.getType())) {
        log("空,输出节点没有预览");
        return PreviewData.builder().build();
    } else {
        this.checkNode();
        PlainSelect plainSelect = this.sql();
        //原始sql
        String originalSql = plainSelect.toString();
        String countSqlFormat = "select count(1) as count from (%s) ";
        //计算总行数sql
        String countSql = String.format(countSqlFormat, originalSql);
        Integer readNum = mutableGraph().getReadNum();
        readNum = readNum == null ? 1000 : readNum;
        //计算起始行
        int start = (pageNo - 1) * pageSize;
        String preSql = originalSql;
        //start 是起始行
        //pageSize 是偏移量
        if (!NodeTypeEnum.AGG.name().equalsIgnoreCase(ddlNode.getType())) {
            preSql = preSql + " limit " + start + "," + pageSize;
        } else {
            preSql = "select * from (" + preSql + ") limit " + start + "," + pageSize;
        }
        List<DdlColumn> ddlColumns = this.columns();
        List<Map<String, Object>> maps = getDdlClickHouseJdbcService().queryForList(preSql);
        PreviewData previewData = convert(ddlColumns, maps);
        List<Map<String, Object>> maps1 = getDdlClickHouseJdbcService().queryForList(countSql);
        if (!CollectionUtils.isEmpty(maps1)) {
            BigInteger count = (BigInteger) (maps1.get(0).get("count"));
            if (count == null) {
                previewData.setTotal(0L);
            } else {
                previewData.setTotal(count.longValue() <= readNum ? count.longValue() : readNum);
            }
        }
        previewData.setSql(originalSql);
        return previewData;
    }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

# 2.代码建表

public Boolean createTable(CreateTableRequestDTO dto) {
    StringBuilder createTableSql = new StringBuilder("create table ");
    createTableSql.append("`")
        .append(dto.getTableName())
        .append("`")
        .append("(");
    List<CreateTableRequestDTO.Field> fields = dto.getFields();
    String sql = fields.stream().map(t -> warpName(t.getFieldName()) + " " + "Nullable(" + t.getFieldType() + ")")
        .collect(Collectors.joining(","));
    createTableSql.append(sql).append(") engine = MergeTree() ORDER BY tuple()");
    try {
        jdbcTemplate.execute(createTableSql.toString());
    } catch (DataAccessException e) {
        throw new DdlException("创建表失败:" + e);
    }
    return true;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# 3.代码增删改查

//生成备份表
String tempName = "temp_" + tableName + "_temp";
String sql1 = "create table " + warpName(dbName) + "." + warpName(tempName) + " as " + warpName(dbName) + "." + warpName(tableName);
super.getDdlClickHouseJdbcService().execute(sql1);

//处理for循环变量
this.handleCyclicVariate(connection, newSql, outputFields, tempName);

//修改原表的表名
String tempNameRemove = "temp_" + tableName + "_temp_remove";
String sql2 = "RENAME TABLE " + warpName(dbName) + "." + warpName(tableName) + " TO " + warpName(dbName) + "." + warpName(tempNameRemove);
super.getDdlClickHouseJdbcService().execute(sql2);


//将备份表表名改为原表表名
String sql3 = "RENAME TABLE " + warpName(dbName) + "." + warpName(tempName) + " TO " + warpName(dbName) + "." + warpName(tableName);
super.getDdlClickHouseJdbcService().execute(sql3);

//删除修改的原表
String sql4 = "DROP table "+ warpName(dbName) + "." + warpName(tempNameRemove);
super.getDdlClickHouseJdbcService().execute(sql4);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
上次更新: 10/29/2024, 10:27:50 AM