odps结合mysql统计
生活随笔
收集整理的這篇文章主要介紹了
odps结合mysql统计
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
需求:結(jié)合odps對(duì)日志就行統(tǒng)計(jì)分析并將結(jié)果導(dǎo)入到mysql中
解決方法:結(jié)合odps命令行odpscmd和mysqldump、mysql以及contab完成該工作。
Shell:
#!/bin/bashPORT="3306" #端口號(hào) USERNAME="biuser" #用戶(hù)名 PASSWORD="!#123date" #密碼 DBNAME="bitest" #數(shù)據(jù)庫(kù)名稱(chēng) RUNDATE=`date +%Y-%m-%d` period=`date +"%Y%m" -d "-1days"`PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/shenl/odpscmd_public/bin:/root/jdk1.7.0_75/bin####1 同步其它庫(kù)的相關(guān)數(shù)據(jù) mysqldump -umdshop -p'xdata123' -h'192.168.128.33' testdb tb_carry tb_city --set-gtid-purged=OFF>testdb.sql mysql -u$USERNAME -p$PASSWORD -D$DBNAME < '/root/testdb.sql'1>/root/syscODPS.log###2 刪除現(xiàn)有分類(lèi) 時(shí)段 渠道 商品數(shù)據(jù) rm -rf /tmp/timespan.txt rm -rf /tmp/timearea.txt rm -rf /tmp/seekarea.txt rm -rf /tmp/tb_good.txt###3 mysql里加工分類(lèi) 時(shí)段 渠道 商品數(shù)據(jù) mysql -u$USERNAME -p$PASSWORD -D$DBNAME <'/root/shenl/timespan.sql' 1>/root/shenl/synctimespan.log###4 odps里刪除分類(lèi) 時(shí)段 渠道 商品數(shù)據(jù) odpscmd -e "DROP TABLE tb_bi_meta_timespan;" odpscmd -e "DROP TABLE tb_bi_meta_timearea;" odpscmd -e "DROP TABLE tb_bi_meta_seekarea;" odpscmd -e "DROP TABLE tb_good;"###5 odps里創(chuàng)建分類(lèi) 時(shí)段 渠道 商品數(shù)據(jù)以便后期加工 odpscmd -e "CREATE TABLE tb_bi_meta_timespan(catalogid int,catalogname string,span int, channel int,action int);" odpscmd -e "CREATE TABLE tb_bi_meta_timearea(catalogid int,catalogname string,province string,province_id int,channel int,action int);" odpscmd -e "CREATE TABLE tb_bi_meta_seekarea(province string,province_id int,channel int);" odpscmd -e "create table tb_good(gid string,g_title string,g_catalog_id string,g_status string);"###6 上傳分類(lèi) 時(shí)段 渠道 商品數(shù)據(jù)到odps對(duì)應(yīng)的表中 odpscmd -e "tunnel upload /tmp/timespan.txt tb_bi_meta_timespan;" odpscmd -e "tunnel upload /tmp/timearea.txt tb_bi_meta_timearea;" odpscmd -e "tunnel upload /tmp/seekarea.txt tb_bi_meta_seekarea;" odpscmd -e "tunnel upload /tmp/tb_good.txt tb_good;"###7 按照分區(qū)插入日志數(shù)據(jù)odpscmd -e "INSERT into table tb_bi_goodbrowse partition(periodsplit="$period")SELECT channel,province,city,itemid,'' as goodclassify,datetrunc(requesttime,'DD') AS period,geid,userid,requesttime,action,getdate() as inserttime FROM tb_bi_marketlog A WHERE isdate(requesttime,'yyyy-mm-dd hh:mi:ss') AND A.period=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm') AND datetrunc(A.requesttime,'DD') = datetrunc(dateadd(getdate(), -1,'dd'),'dd');"###8 odps計(jì)算生成出報(bào)表數(shù)據(jù)和導(dǎo)出 odpscmd -f "/root/shenl/odpsstat0320.sql"###9 刪除現(xiàn)有的報(bào)表數(shù)據(jù) rm -rf /var/log/mysql/timespanout.txt rm -rf /var/log/mysql/timeareaout.txt rm -rf /var/log/mysql/seekareaout.txt###10 odps的報(bào)表數(shù)據(jù)下載到/var/log/mysql目錄內(nèi)odpscmd -e "tunnel download max_compute.tb_bi_report_timespans /var/log/mysql/timespanout.txt" chown mysql:mysql /var/log/mysql/timespanout.txt odpscmd -e "tunnel downloadmax_compute.tb_bi_report_timeareas /var/log/mysql/timeareaout.txt" chown mysql:mysql /var/log/mysql/timeareaout.txt odpscmd -e "tunnel download -ni ''max_compute.tb_bi_report_seekareas /var/log/mysql/seekareaout.txt" chown mysql:mysql /var/log/mysql/seekareaout.txt###11 mysql導(dǎo)入odps里統(tǒng)計(jì)后的數(shù)據(jù) mysql -u$USERNAME -p$PASSWORD -D$DBNAME <'/root/shenl/load2mysql.sql' 1>/root/shenl/load2mysql.log1)?其中/root/shenl/timespan.sql的腳本內(nèi)容:
use bi; SELECT A.id AS catalogid,A.c_name catalogname,B.i AS span,C.i AS channel,D.i AS action INTO OUTFILE '/tmp/timespan.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM (SELECT id,c_name FROM shenl_catalog WHERE c_parent_id = 0) A CROSS JOIN tb_incr B CROSS JOIN (SELECT * FROM tb_incr WHERE i>0 AND i<4)C CROSS JOIN (SELECT * FROM tb_incr WHERE i>0 AND i<5)D;SELECT A.id AS catalogid,A.c_name catalogname,B.province,B.province_id,C.i AS channel,D.i AS action into outfile '/tmp/timearea.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM (SELECT id,c_name FROM shenl_catalog WHERE c_parent_id = 0)A CROSS JOIN (SELECT DISTINCT province,province_id FROM tb_bi_area)B CROSS JOIN(SELECT * FROM tb_incr WHERE i>0 AND i<4)C CROSS JOIN(SELECT * FROM tb_incr WHERE i>0 AND i<5)D;SELECT B.province,B.province_id,C.i AS channel into outfile '/tmp/seekarea.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'FROM (SELECT DISTINCT province, province_id FROM tb_bi_area) B CROSS JOIN (SELECT * FROM tb_incr WHERE i > 0 AND i < 4) C;SELECT gid,REPLACE(REPLACE(REPLACE(g_title,char(10),''),CHAR(13),''),',','') AS g_title,g_catalog_id,g_status into outfile '/tmp/shenl_goods.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM shenl_goods;2)?/root/shenl/odpsstat0320.sql 內(nèi)統(tǒng)計(jì)腳本內(nèi)容:
insert OVERWRITE table tb_bi_report_timespans SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.catalogname,A.span,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel, A.action,case when D.catalogid IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_timespan ALEFT OUTER JOIN(SELECT C.g_catalog_id as catalogid,channel,datepart(browsetime,'hh') as spanid,COUNT(DISTINCT browsetime) as browsetimes,action from tb_bi_goodbrowse AJOIN shenl_goods CON A.good = C.gid AND A.period=datetrunc(dateadd(getdate(), -1, 'dd'),'dd') AND A.periodsplit=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm')GROUP BY C.g_catalog_id,datepart(browsetime,'hh'),channel,action)DON A.catalogid = D.catalogid AND A.span = D.spanid AND A.channel = D.channel AND A.action = D.action;insert OVERWRITE table tb_bi_report_timeareas SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.catalogname,A.province,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel, A.action,case when D.catalogid IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_timearea ALEFT OUTER JOIN(SELECT B.province_id,period,channel,COUNT(1) as browsetimes,C.g_catalog_id as catalogid,actionfrom tb_bi_goodbrowse AJOIN (SELECT distinct province,province_id FROM tb_bi_area)BON A.province = B.province_idJOIN shenl_goods CON A.good = C.gid AND A.period=datetrunc(dateadd(getdate(), -1, 'dd'),'dd') AND A.periodsplit=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm')GROUP BY B.province_id,C.g_catalog_id,period,channel,action)DON A.catalogid = D.catalogid AND A.province_id = D.province_id AND A.channel=D.channel AND A.action = D.action;insert overwrite table tb_bi_report_seekareas SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.province,D.item as item,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel,case when D.province_id IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_seekarea ALEFT OUTER JOIN(SELECT B.province_id,datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,channel,item,COUNT(DISTINCT requesttime) as browsetimesfrom tb_bi_marketlog AJOIN (SELECT distinct province,province_id FROM tb_bi_area)BON A.province = B.province_id AND A.action =2 AND isdate(A.requesttime,'yyyy-mm-dd hh:mi:ss') AND A.period=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm') AND datetrunc(A.requesttime,'DD') = datetrunc(dateadd(getdate(), -1, 'dd'),'dd') GROUP BY B.province_id,period,channel,item)DON A.province_id = D.province_id AND A.channel=D.channel3)?/root/shenl/load2mysql.sql腳本中的內(nèi)容是:
load data infile '/var/log/mysql/timespanout.txt' into tabletb_bi_report_timespans fields terminated by ',' lines terminated by '\n'; load data infile '/var/log/mysql/timeareaout.txt' into tabletb_bi_report_timeareas fields terminated by ',' lines terminated by '\n'; load data infile '/var/log/mysql/seekareaout.txt' into tabletb_bi_report_seekareas fields terminated by ',' lines terminated by '\n';代碼解讀:詳見(jiàn)注釋部分
總結(jié)
以上是生活随笔為你收集整理的odps结合mysql统计的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: win7怎么确保显卡打开 Win7如何正
- 下一篇: SQL基础操作_8_基础概念