【SQL】概率統計功能


複習:

MySQL語言執行順序

from
on
join
where
group by (包含sum()等聚合函數)
having
select
distinct
union
order by

1 累加

1.1 基礎累加

創建訂單表

create table SC (id varchar(10),dt datetime(10),orderamt decimal(18,1));
insert into SC values('01' , '2018-01-01' , 2);
insert into SC values('02' , '2018-01-02' , 3);
insert into SC values('03' , '2018-01-03' , 5);
insert into SC values('04' , '2018-01-04' , 10);
select * from SC;

Output:

id dt orderamt
01 2018-01-01 2
02 2018-01-02 3
03 2018-01-03 5
04 2018-01-04 10

累加算法

  • 使用不等值進行連接
  • 執行順序:先join on,再group by、sum

select a.id, a.dt, a.orderamt, sum(b.orderamt) as cum 
	from SC a
	join SC b
	on a.dt >= b.dt  
	group by a.id, a.dt, a.orderamt;

Output:

id dt orderamt cum
01 2018-01-01 2 2
02 2018-01-02 3 5
03 2018-01-03 5 10
04 2018-01-04 10 20

1.2 分組累加

創建訂單表

create table SC (id varchar(10),dt datetime(10),orderamt decimal(18,1));
insert into SC values('01' , '2018-01-01' , 2);
insert into SC values('02' , '2018-01-02' , 7);
insert into SC values('03' , '2018-02-03' , 5);
insert into SC values('04' , '2018-02-04' , 10);
select * from SC;

Output:

id dt orderamt
01 2018-01-01 2
02 2018-01-02 7
03 2018-02-03 5
04 2018-02-04 10

累加算法:按月份分組累加

  • 使用不等值進行連接
  • 執行順序:先join on,再group by、sum

方法一:用最簡單的MySQL語法來寫

select a.id, a.dt, a.orderamt, sum(b.orderamt) as cum 
	from SC a
	join SC b
	on a.dt >= b.dt and substr(a.dt, 1, 7) = substr(b.dt, 1, 7)
	group by a.id, a.dt, a.orderamt;

	-- 解釋:substr(string, start, length)

方法二:用窗口函數:一句話搞定 !!!

select *, 
    sum(orderamt) over (partition by substr(dt, 1, 7) order by dt) as cum 
    from SC; 

	-- 窗口函數執行順序:括號由內到外,partition by先,order次,sum最後

Output:

id dt orderamt cum
01 2018-01-01 2 2
02 2018-01-02 7 9
03 2018-01-03 5 5
04 2018-01-04 10 15

1.3 累計概率

計算思路:
  • 第一步:再計算一個按月sum_total的表
  • 第二步:用1.2得到的按月累加 / 按月sum_total的表,得到累計概率%

方法一:用最簡單的MySQL語法來寫

select aa.id, aa.dt, bb.mon, aa.orderamt, aa.cum, bb.total, 
    (round(cum * 100.00 / total ,2)  || '%' )  as cum_pct 
    
    from (
    
        (select a.id, a.dt, a.orderamt, substr(a.dt, 1, 7) as mon,sum(b.orderamt) as cum
            from SC a
            join SC b
            on a.dt >= b.dt and substr(a.dt, 1, 7) = substr(b.dt, 1, 7)
            group by a.id, a.dt, a.orderamt) aa    -- Step1.按月累加的表

        left join --Step3. 把aa累加/bb總計 得到累計概率%

        (select dt, substr(dt, 1, 7) as mon, sum(orderamt) as total 
            from SC
            group by mon) bb     -- Step2. 按月sum_total總計的表

        on aa.mon = bb.mon
        
    );
        

方法二:用窗口函數partition by來寫

select aa.id, aa.dt, aa.mon, aa.orderamt, aa.cum, bb.total, 
    (round(cum * 100.00 / total ,2)  || '%' )  as cum_pct  from
    
    (
    
    (select *, substr(dt, 1, 7) as mon,
        sum(orderamt) over (partition by substr(dt, 1, 7) order by dt) as cum 
        from SC) aa -- Step1.按月累加的表
    
    left join --Step3. 把aa累加/bb總計 得到累計概率%

    (select substr(dt, 1, 7) as mon, sum(orderamt) as total 
        from SC 
        group by mon) bb  -- Step2. 按月sum_total總計的表

    on aa.mon = bb.mon
    
    );

Output:

id dt mon orderamt cum total cum_pct
01 2018-01-01 2018-01 2 2 9 22.22%
02 2018-01-02 2018-01 7 9 9 100.0%
03 2018-02-01 2018-02 5 5 15 33.33%
04 2018-02-02 2018-02 10 15 15 100.0%

2 環比同比


3 計算日活、留存率




參考文獻
[1] 超哥的雜貨鋪
https://cloud.tencent.com/developer/article/1587654

[2] 計算同比、環比
https://cloud.tencent.com/developer/article/1587652

[3] 統計日活、留存
https://cloud.tencent.com/developer/article/1587655

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章