微网站开发技术,英文版的wordpress能否改中文版,做vip的网站好做吗,保山市住房和城乡建设局门户网站友情提示#xff1a; 抄本篇文章答案之前#xff0c;请务必提前下载好 反诈APP。
多年情怀粉#xff0c;今日粉转黑.... 问题#xff1a;
Q1: 将附件中 ip_china.csv.zip文件加载为 Hive 内部表#xff0c;保持格式与 csv header一致#xff0c;表需要开启压缩 Q2: 将…
友情提示 抄本篇文章答案之前请务必提前下载好 反诈APP。
多年情怀粉今日粉转黑.... 问题
Q1: 将附件中 ip_china.csv.zip文件加载为 Hive 内部表保持格式与 csv header一致表需要开启压缩 Q2: 将附件中 login_data.csv.zip文件加载为 Hive 外部表保持格式与csv header一致表需要开启压缩需要按日分区 Q3: 通过Q1Q2加载的数据将用户登陆表中的ip转化为对应的国家地区并落表避免笛卡尔积 Q4: 请输出每个分区下每个province的去重登陆人数。输出结构为 ptprovincecnt_login Q5: 请输出总量数据下存在登陆数据的各个province中登陆时间最早的前3人及对应的登陆时间若不满3人需要留空。输出结构为 provinceaccount_id_1, login_time_1, account_id_2, login_time_2, account_id_3, login_time_3 q1.sql CREATE TABLE ip_txt (
ip_start string
,ip_end string
,long_ip_start string
,long_ip_end string
,country string
,province string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ,
STORED AS TEXTFILE;LOAD DATA LOCAL INPATH /root/ip_china.csv INTO TABLE ip_txt;CREATE TABLE IF NOT EXISTS ip_orc(ip_start string,ip_end string,long_ip_start string,long_ip_end string,country string,province string)ROW FORMAT DELIMITED FIELDS TERMINATED BY ,STORED AS ORCTBLPROPERTIES (orc.compressSNAPPY);INSERT INTO TABLE ip_orc SELECT ip_start ,ip_end ,long_ip_start,long_ip_end,country,province FROM ip_txt where ip_start ! ip_start q2.sql
CREATE TABLE login_txt (
logtime string
,account_id string
,ip string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ,
STORED AS TEXTFILE;LOAD DATA LOCAL INPATH /root/login_data.csv INTO TABLE login_txt;CREATE EXTERNAL TABLE IF NOT EXISTS login_orc(
logtime string
,account_id string
,ip string
)
PARTITIONED BY (ds string )
ROW FORMAT DELIMITED FIELDS TERMINATED BY ,
STORED AS ORC
TBLPROPERTIES (orc.compressSNAPPY
);SET hive.exec.dynamic.partitiontrue;
SET hive.exec.dynamic.partition.modenonstric; INSERT INTO TABLE login_orc PARTITION(ds) SELECT logtime ,account_id ,ip,substr(logtime,0,10) as ds FROM login_txt where logtime ! logtime q3.sql
create table if not exists user_info as
WITH ip_tmp AS(SELECTt1.*,cast(split(t1.ip, \\.)[0] as bigint)*256*256*256 cast(split(t1.ip, \\.)[1] as bigint)*256*256 cast(split(t1.ip,\\.)[2] as bigint)*256 cast(split(t1.ip,\\.)[3] as bigint) AS ip_long
FROM login_orc t1
)
SELECT
ip_tmp.account_id
,ip_tmp.ip_long
,t2.province
,ip_tmp.logtime
,ip_tmp.ds
FROM ip_tmp
LEFT JOIN ip_orc t2
WHERE
ip_tmp.ip_long t2.long_ip_start and ip_tmp.ip_long t2.long_ip_end
;select * from user_info limit 10;
q3.png q4.sql
SELECT
ds as pt
,province
,count(distinct account_id) as cnt_login
FROM user_info
GROUP BY ds,province
ORDER BY ds,cnt_login; q4.png q5.sql
WITH grouped_data AS (SELECTprovince,concat_ws(,, collect_list(concat(account_id, ,, logtime))) AS merged_valuesFROM (select province ,account_id,logtime, row_number() over(partition by province order by logtime) as rid from user_info ) t where t.rid 3GROUP BY province)
SELECTprovince,split(merged_values,,)[0] as account_id_1,split(merged_values,,)[1] as login_time_1,split(merged_values,,)[2] as account_id_2,split(merged_values,,)[3] as login_time_2,split(merged_values,,)[4] as account_id_3,split(merged_values,,)[5] as login_time_3
FROMgrouped_data
where (LENGTH(merged_values) - LENGTH(REPLACE(merged_values, ,, ))) 5
union all
select province
, as account_id_1
, as login_time_1
, as account_id_2
, as login_time_2
, as account_id_3
, as login_time_3
from grouped_data where (LENGTH(merged_values) - LENGTH(REPLACE(merged_values, ,, ))) 5 q5.png