#原始示例数据
#1,k1|k2|k3|k1
#2,k1|k1|k5|k3|k6
#3,k3|k6|k7|k8

#建表语句
CREATE TABLE IF NOT EXISTS words(id INT,kw STRING)
ROW FORMAT delimited
FIELDS TERMINATED BY ','
collection items TERMINATED BY '|';

#加载数据(local从本地,否则从hdfs;overwrite覆盖原有数据)
load data [local] inpath 'J:/test.txt' [overwrite] into table words;

#字段转行(拼接成1个字段),结果如下
1 k1
1 k2
1 k3
1 k1
2 k1
2 k1
2 k5
2 k3
2 k6
3 k3
3 k6
3 k7
3 k8
SELECT t1.id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\\|')) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w;


#字段转行
SELECT t1.id ,t1.k_w ,COUNT(1) FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\\|')) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w;


#相同id数据分组后,将多行转1个字段类型为集合字符串,结果如下
1 k1:2,k2:1,k3:1
2 k1:2,k3:1,k5:1,k6:1
3 k3:1,k6:1,k7:1,k8:1
SELECT t2.id AS id,CONCAT_WS(',',collect_set(t2.kw_time)) AS kw_time_set FROM (
SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\\|')) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w
) AS t2
GROUP BY t2.id;

#相同id数据分组后,将多行转1个字段类型为集合,结果如下
1 ["k1:2","k2:1","k3:1"]
2 ["k1:2","k3:1","k5:1","k6:1"]
3 ["k3:1","k6:1","k7:1","k8:1"]
SELECT t2.id AS id,collect_set(t2.kw_time) AS kw_time_set FROM (
SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\\|')) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w
) AS t2
GROUP BY t2.id;
扫码关注我们
微信号:SRE实战
拒绝背锅 运筹帷幄