常见大数据面试SQL-销售额连续3天增长的商户
一、题目
有一张订单记录表 t5_order 包含 订单ID(order_id),商户ID(shop_id),订单时间(order_time)和订单金额(order_amt),请查询出过去至少存在3天销售额连续增长的商户
+-----------+----------+----------------------+------------+
| order_id | shop_id | order_time | order_amt |
+-----------+----------+----------------------+------------+
| 1 | 1001 | 2023-08-21 09:01:00 | 9.99 |
| 2 | 1001 | 2023-08-22 10:00:00 | 19.99 |
| 3 | 1001 | 2023-08-22 13:00:00 | 8.88 |
| 4 | 1001 | 2023-08-23 08:00:00 | 29.99 |
| 5 | 1001 | 2023-08-23 09:00:00 | 19.99 |
| 6 | 1001 | 2023-08-24 11:00:00 | 99.99 |
| 7 | 1001 | 2023-08-25 15:00:00 | 1.99 |
| 8 | 1001 | 2023-08-26 16:00:00 | 2.99 |
| 9 | 1001 | 2023-08-26 17:00:00 | 95.99 |
| 10 | 1002 | 2023-08-21 09:00:00 | 9.99 |
| 11 | 1002 | 2023-08-22 11:00:00 | 1.99 |
| 12 | 1002 | 2023-08-22 11:01:00 | 19.99 |
| 13 | 1002 | 2023-08-22 12:05:00 | 14.99 |
| 14 | 1002 | 2023-08-22 13:00:00 | 6.99 |
| 15 | 1002 | 2023-08-23 14:00:00 | 99.99 |
| 16 | 1002 | 2023-08-24 13:00:00 | 19.99 |
| 17 | 1002 | 2023-08-25 09:00:00 | 19.99 |
| 18 | 1002 | 2023-08-25 11:00:00 | 5.99 |
| 19 | 1002 | 2023-08-25 13:00:00 | 6.99 |
| 20 | 1002 | 2023-08-25 13:07:00 | 7.0 |
| 21 | 1002 | 2023-08-25 15:00:00 | 10.0 |
| 22 | 1002 | 2023-08-26 07:00:00 | 9.99 |
| 23 | 1003 | 2023-08-21 13:07:00 | 7.0 |
| 24 | 1003 | 2023-08-22 15:00:00 | 8.0 |
| 25 | 1003 | 2023-08-23 07:00:00 | 9.99 |
| 26 | 1003 | 2023-08-25 13:07:00 | 10.0 |
| 27 | 1003 | 2023-08-26 15:00:00 | 11.0 |
+-----------+----------+----------------------+------------+
二、分析
- 计算出每天商户的总销售额;
- 由于要连续3日增长,对每个商户计算是否比上一条记录增长,剔除掉不增长的记录;
- 连续问题,使用双排序法计算出每次连续增长天数;
- 查询最后结果
维度 | 评分 |
---|---|
题目难度 | ⭐️⭐️⭐️⭐️⭐️ |
题目清晰度 | ⭐️⭐️⭐️⭐️⭐️ |
业务常见度 | ⭐️⭐️⭐️⭐️ |
三、SQL
1.计算出每天商户的总销售额;
执行SQL
select shop_id,
to_date(order_time) as order_date,
sum(order_amt) as order_amt
from t5_order
group by shop_id, to_date(order_time)
查询结果
+----------+-------------+---------------------+
| shop_id | order_date | order_amt |
+----------+-------------+---------------------+
| 1001 | 2023-08-21 | 9.99 |
| 1001 | 2023-08-22 | 28.869999999999997 |
| 1001 | 2023-08-23 | 49.98 |
| 1001 | 2023-08-24 | 99.99 |
| 1001 | 2023-08-25 | 1.99 |
| 1001 | 2023-08-26 | 98.97999999999999 |
| 1002 | 2023-08-21 | 9.99 |
| 1002 | 2023-08-22 | 43.96 |
| 1002 | 2023-08-23 | 99.99 |
| 1002 | 2023-08-24 | 19.99 |
| 1002 | 2023-08-25 | 49.97 |
| 1002 | 2023-08-26 | 9.99 |
| 1003 | 2023-08-21 | 7.0 |
| 1003 | 2023-08-22 | 8.0 |
| 1003 | 2023-08-23 | 9.99 |
| 1003 | 2023-08-25 | 10.0 |
| 1003 | 2023-08-26 | 11.0 |
+----------+-------------+---------------------+
2.查询增长额,如果差额是null 代表本商户的第一条记录,如果是>0 则代表增长,<= 0 不增长
执行SQL
with tmp as
(select shop_id,
to_date(order_time) as order_date,
sum(order_amt) as order_amt
from t5_order
group by shop_id, to_date(order_time)),
tmp2 as
(
--计算每个商户是否比上一个有销售额日期增加
select shop_id,
order_date,
order_amt,
order_amt - lag(order_amt, 1) over (partition by shop_id order by order_date) as order_amt_diff
from tmp)
select *
from tmp2;
查询结果
+---------------+------------------+---------------------+-----------------------+
| tmp2.shop_id | tmp2.order_date | tmp2.order_amt | tmp2.order_amt_diff |
+---------------+------------------+---------------------+-----------------------+
| 1001 | 2023-08-21 | 9.99 | NULL |
| 1001 | 2023-08-22 | 28.869999999999997 | 18.879999999999995 |
| 1001 | 2023-08-23 | 49.98 | 21.11 |
| 1001 | 2023-08-24 | 99.99 | 50.01 |
| 1001 | 2023-08-25 | 1.99 | -98.0 |
| 1001 | 2023-08-26 | 98.97999999999999 | 96.99 |
| 1002 | 2023-08-21 | 9.99 | NULL |
| 1002 | 2023-08-22 | 43.96 | 33.97 |
| 1002 | 2023-08-23 | 99.99 | 56.029999999999994 |
| 1002 | 2023-08-24 | 19.99 | -80.0 |
| 1002 | 2023-08-25 | 49.97 | 29.98 |
| 1002 | 2023-08-26 | 9.99 | -39.98 |
| 1003 | 2023-08-21 | 7.0 | NULL |
| 1003 | 2023-08-22 | 8.0 | 1.0 |
| 1003 | 2023-08-23 | 9.99 | 1.9900000000000002 |
| 1003 | 2023-08-25 | 10.0 | 0.009999999999999787 |
| 1003 | 2023-08-26 | 11.0 | 1.0 |
+---------------+------------------+---------------------+-----------------------+
3.剔除不增长的记录,计算每次连续次数。(连续问题,这里使用双排序法,就不赘叙了)
执行SQL
--计算每个商户每天销售金额
with tmp as
(select shop_id,
to_date(order_time) as order_date,
sum(order_amt) as order_amt
from t5_order
group by shop_id, to_date(order_time)),
t2 as
(
--计算每个商户是否比上一个有销售额日期增加
select shop_id,
order_date,
order_amt,
order_amt - lag(order_amt, 1) over (partition by shop_id order by order_date) as order_amt_diff
from tmp)
select shop_id,
date_diff,
count(1) aa
from (
--按照商户排序,计算是否连续
select *,
datediff(order_date, '2023-08-01') as shop_date_diff1,
row_number() over (partition by shop_id order by order_date) as shop_date_diff2,
datediff(order_date, '2023-08-01') -
row_number() over (partition by shop_id order by order_date) as date_diff
from t2
where order_amt_diff > 0) t
group by shop_id, date_diff
查询结果
+----------+------------+-----+
| shop_id | date_diff | aa |
+----------+------------+-----+
| 1001 | 20 | 3 |
| 1001 | 21 | 1 |
| 1002 | 20 | 2 |
| 1002 | 21 | 1 |
| 1003 | 20 | 2 |
| 1003 | 21 | 2 |
+----------+------------+-----+
4.计算最终结果
执行SQL
--计算每个商户每天销售金额
with tmp as
(select shop_id,
to_date(order_time) as order_date,
sum(order_amt) as order_amt
from t5_order
group by shop_id, to_date(order_time)),
t2 as
(
--计算每个商户是否比上一个有销售额日期增加
select shop_id,
order_date,
order_amt,
order_amt - lag(order_amt, 1) over (partition by shop_id order by order_date) as order_amt_diff
from tmp)
select shop_id
from (select shop_id,
date_diff,
count(1) aa
from (
--按照商户排序,计算是否连续
select *,
datediff(order_date, '2023-08-01') as shop_date_diff1,
row_number() over (partition by shop_id order by order_date) as shop_date_diff2,
datediff(order_date, '2023-08-01') -
row_number() over (partition by shop_id order by order_date) as date_diff
from t2
where order_amt_diff > 0) t
group by shop_id, date_diff) tt
where aa >= 3;
查询结果
+----------+
| shop_id |
+----------+
| 1001 |
+----------+
四、建表语句和数据插入
--建表语句
CREATE TABLE t5_order (
order_id bigint COMMENT '订单ID',
shop_id bigint COMMENT '商户ID',
order_time string COMMENT '订单时间',
order_amt double COMMENT '订单金额'
) COMMENT '订单记录表';
-- 插入数据
insert into t5_order(order_id,shop_id,order_time,order_amt)
values
(1,1001,'2023-08-21 09:01:00',9.99),
(2,1001,'2023-08-22 10:00:00',19.99),
(3,1001,'2023-08-22 13:00:00',8.88),
(4,1001,'2023-08-23 08:00:00',29.99),
(5,1001,'2023-08-23 09:00:00',19.99),
(6,1001,'2023-08-24 11:00:00',99.99),
(7,1001,'2023-08-25 15:00:00',1.99),
(8,1001,'2023-08-26 16:00:00',2.99),
(9,1001,'2023-08-26 17:00:00',95.99),
(10,1002,'2023-08-21 09:00:00',9.99),
(11,1002,'2023-08-22 11:00:00',1.99),
(12,1002,'2023-08-22 11:01:00',19.99),
(13,1002,'2023-08-22 12:05:00',14.99),
(14,1002,'2023-08-22 13:00:00',6.99),
(15,1002,'2023-08-23 14:00:00',99.99),
(16,1002,'2023-08-24 13:00:00',19.99),
(17,1002,'2023-08-25 09:00:00',19.99),
(18,1002,'2023-08-25 11:00:00',5.99),
(19,1002,'2023-08-25 13:00:00',6.99),
(20,1002,'2023-08-25 13:07:00',7.00),
(21,1002,'2023-08-25 15:00:00',10.00),
(22,1002,'2023-08-26 07:00:00',9.99),
(23,1003,'2023-08-21 13:07:00',7.00),
(24,1003,'2023-08-22 15:00:00',8.00),
(25,1003,'2023-08-23 07:00:00',9.99),
(26,1003,'2023-08-25 13:07:00',10.00),
(27,1003,'2023-08-26 15:00:00',11.00);
本文同步在微信公众号”数据仓库技术“和个人博客”数据仓库技术 (opens in a new tab)“发表;