建设招聘网站,能让手机流畅到爆的软件,如何制作网页图片文字链接,网页制作基础教程本社窗口函数#xff08;开窗函数#xff09;
1 相关函数说明
普通的聚合函数聚合的行集是组,开窗函数聚合的行集是窗口。因此,普通的聚合函数每组(Group by)只返回一个值#xff0c;而开窗函数则可为窗口中的每行都返回一个值。简单理解#xff0c;就是对查询的结果多出一列…窗口函数开窗函数
1 相关函数说明
普通的聚合函数聚合的行集是组,开窗函数聚合的行集是窗口。因此,普通的聚合函数每组(Group by)只返回一个值而开窗函数则可为窗口中的每行都返回一个值。简单理解就是对查询的结果多出一列这一列可以是聚合值也可以是排序值。 开窗函数一般分为两类,聚合开窗函数和排序开窗函数。
OVER()指定分析函数工作的数据窗口大小这个数据窗口大小可能会随着行的变而变
CURRENT ROW当前行
n PRECEDING往前 n 行数据
n FOLLOWING往后 n 行数据
UNBOUNDED起点 UNBOUNDED PRECEDING 表示从前面的起点 UNBOUNDED FOLLOWING 表示到后面的终点
LAG(col,n,default_val)往前第 n 行数据
LEAD(col,n, default_val)往后第 n 行数据
NTILE(n)把有序窗口的行分发到指定数据的组中各个组有编号编号从 1 开始对 于每一行NTILE 返回此行所属的组的编号。注意n 必须为 int 类型。
2 数据准备nameorderdatecost
jack,2017-01-01,10
tony,2017-01-02,15
jack,2017-02-03,23
tony,2017-01-04,29
jack,2017-01-05,46
jack,2017-04-06,42
tony,2017-01-07,50
jack,2017-01-08,55
mart,2017-04-08,62
mart,2017-04-09,68
neil,2017-05-10,12
mart,2017-04-11,75
neil,2017-06-12,80
mart,2017-04-13,943 需求
1查询在 2017 年 4 月份购买过的顾客及总人数
2查询顾客的购买明细及月购买总额
3上述的场景, 将每个顾客的 cost 按照日期进行累加
4查询每个顾客上次的购买时间
5查询前 20%时间的订单信息4 创建本地 business.txt导入数据
[rootlocalhost datas]$ vi business.txt5 创建 hive 表并导入数据
create table business(
name string,
orderdate string,
cost int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ,;load data local inpath /usr/soft/datas/business.txt into table business;5.1 over() 初体验
select name,count(*) from business;FAILED: SemanticException [Error 10025]: Line 1:7 Expression not in GROUP BY key name-- 使用over()函数
select name,count(*) over() from business;over() 类似于group by但是在分组时每一个字段都单独作为一组
6 按需求查询数据
1 查询在 2017 年 4 月份购买过的顾客及总人数
select distinct(name) from business where substring(orderdate,0,7) 2017-04;select count(*),name from (select name from business where substring(orderdate,0,7)2017-04 group by name)t1;select name,count(*) over ()
from business
where substring(orderdate,1,7) 2017-04
group by name; 2 查询顾客的购买明细及月购买总额
select name,orderdate,cost,sum(cost) over(partition by month(orderdate)) from business;3 将每个顾客的 cost 按照日期进行累加
select * from business order by name,orderdate; --按照姓名和日期排序select name,orderdate,cost,sum(cost) over(partition by name order by orderdate) from business; --按照姓名和日期排序,同时累加costselect name,orderdate,cost,sum(cost) over(partition by name order by orderdate rows between UNBOUNDED PRECEDING and current row) from business ;select name,orderdate,cost,
sum(cost) over() as sample1,--所有行相加sum(cost) over(partition by name) as sample2,--按 name 分组组内数据相加sum(cost) over(partition by name order by orderdate) as sample3,--按 name分组组内数据累加sum(cost) over(partition by name order by orderdate rows between
UNBOUNDED PRECEDING and current row ) as sample4 ,--和 sample3 一样,由起点到当前行的聚合sum(cost) over(partition by name order by orderdate rows between 1
PRECEDING and current row) as sample5, --当前行和前面一行做聚合sum(cost) over(partition by name order by orderdate rows between 1
PRECEDING AND 1 FOLLOWING ) as sample6,--当前行和前边一行及后面一行sum(cost) over(partition by name order by orderdate rows between current
row and UNBOUNDED FOLLOWING ) as sample7 --当前行及后面所有行from business;
-- rows 必须跟在 order by 子句之后对排序的结果进行限制使用固定的行数来限制分区中的数据行数量4 查看顾客上次的购买时间
--原始语句
select name,orderdate,lag(orderdate,1) over(partition by name order by orderdate )
from business;--添加默认值
select name,orderdate,lag(orderdate,1,1900-01-01) over(partition by name order by orderdate )
from business;--延申
select name,orderdate,cost,
lag(orderdate,1,1900-01-01) over(partition by name order by orderdate )
as time1, lag(orderdate,2) over (partition by name order by orderdate) as
time2
from business; 5 查询前 20%时间的订单信息
select * from (select name,orderdate,cost, ntile(5) over(order by orderdate) sortedfrom business
) t
where sorted 1;