|
|

楼主 |
发表于 2016-7-21 23:19:50
|
显示全部楼层
贴上一下白菜的代码,,,,不管能不能解决,希望留下点意见,,,,一个人的视野太窄了
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import pymysql
import re
import requests
import json
import time
#ptn = r'\d??(\d{4}-\d\d-\d\d).*(\d\d:\d\d:\d\d).*(\d{10}\.\d{3}).*(\s\d+\.\d+\.\d+\.\d+\s).*(www.*?[.com||.com.cn||.com.hk])[/||\s]'
ptn = r'\d??(\d{4}-\d\d-\d\d).*(\d\d:\d\d:\d\d).*(\d{10}\.\d{3}).*(\s\d+\.\d+\.\d+\.\d+\s).*://([^/]*)/'
rcdn = r'(\d+)\s+(GET|POST|PUT|CONNECT|HEAD|NONE)'
rurl =r'"(http[^ ]*)"'
conn1= pymysql.connect("192.168.4.127","root","syslog_ng321","ceshi",charset="utf8")
cursor1 = conn1.cursor()
sid=1
eid=10
while 1:
sql1 = "select * from logs where id between %d and %d"%(sid,eid)
print(sid)
print(eid)
sid=eid+1
eid+=10
cursor1.execute(sql1)
rs=cursor1.fetchall()
for line in rs:
line=[str(i) for i in line]
line=''.join(line)
a=re.search(rurl,line)
if a:
urla=a.group(0).replace('"','')
urlb=urla.replace("\\","")
s=re.findall(ptn,line)
cdn=re.findall(rcdn,line)
#文件类型
if 'text/html' in line:
file_type=['text/html']
elif 'text/css' in line:
file_type=['text/css']
elif 'image/jpeg' in line:
file_type=['image/jpeg']
elif 'text/plain' in line:
file_type=['text/plain']
elif 'application/javascript' in line:
file_type=['application/javascript']
else:
file_type=['other']
#CDN带宽,回源带宽
if 'NONE' in line:
cdn_andwidth=cdn[0][0]
host_andwidth='0'
else:
cdn_andwidth='0'
host_andwidth=cdn[0][0]
#OS操作系统
if 'Windows' in line:
os=['windows']
elif 'Mac' in line:
os=['mac']
elif 'Android' in line:
os=['android']
elif 'BlackBerry' in line:
os=['BlackBerry']
elif 'Ubuntu' in line:
os=['ubuntu']
else:
os=['other']
#pv请求
if 'html' in line:
pv=['1']
else:
pv=['0']
#browser浏览器
if 'Mac' in line and 'Safari' in line:
browser=['Safari']
elif 'Mac' in line and 'MicroMessenger' in line:
browser=['MicroMessenger']
elif 'Mac' in line and 'UCBrowser' in line:
browser=['UCBrowser']
elif 'Windows'in line and 'Chrome' in line:
browser=['Chrome']
elif 'Windows'in line and 'QQBrowser' in line:
browser=['QQBrowser']
elif 'Windows'in line and 'Firefox' in line:
browser=['Firefox']
elif 'Windows'in line and 'MSIE' in line:
browser=['MSIE']
elif 'Android'in line and 'UCBrowser' in line:
browser=['UCBrowser']
elif 'Android'in line and 'Chrome' in line:
browser=['Chrome']
elif 'Android'in line and 'MQQBrowser' in line:
browser=['MQQBrowser']
elif 'Windows' not in line and 'Mac' not in line and 'Android' not in line and 'Firefox' in line:
browser=['Firefox']
elif 'Windows' not in line and 'Mac' not in line and 'Android' not in line and 'BaiduBrowser' in line:
browser=['BaiduBrowser']
else:
browser=['other']
sql2 ='insert into cdn_logs (data,time_while,time_stamp,ip,domain,OS,pv,browser,cdn_andwidth,host_andwidth,file_type,url) values("%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s")'%(s[0][0],s[0][1],s[0][2],s[0][3],
s[0][4],os[0],pv[0],browser[0],cdn_andwidth,host_andwidth,file_type[0],urlb)
print(sql2)
time.sleep(0.05)
cursor1.execute(sql2)
cursor1.close()
conn1.commit()
conn2.close() |
|