|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 cc1236 于 2021-5-4 17:42 编辑
- import datetime
- from scipy.io import savemat
- def create_assist_date(datestart=None, dateend=None):
- # 创建日期辅助表
- if datestart is None:
- datestart = '2016-01-01'
- if dateend is None:
- dateend = datetime.datetime.now().strftime('%Y-%m-%d')
- # 转为日期格式
- datestart = datetime.datetime.strptime(datestart, '%Y-%m-%d')
- dateend = datetime.datetime.strptime(dateend, '%Y-%m-%d')
- date_list = [datestart.strftime('%Y-%m-%d')]
- while datestart < dateend:
- # 日期叠加一天
- datestart += datetime.timedelta(days=+1)
- # 日期转字符串存入列表
- date_list.append(datestart.strftime('%Y-%m-%d'))
- return date_list
复制代码
date_list就是我说的日期辅助表,从这里获取不到
第二段- import jieba
- import re,string
- from zhon.hanzi import punctuation
- import os
- import csv
- danmuCount = dict()
- danmuNum = 0
- punc = '~`!#$%^&*()_+-=|\';":/.,?><~·!@#¥%……&*()——+-=“:’;、。?》《{} oh1O○〇●哈'
- with open('danmuku3.csv', 'a', encoding='utf-8') as savefile:
- writer = csv.writer(savefile)
- writer.writerow(['name','type','value','date'])
- for date in date_list:
- with open('csv3/danmutext_'+ date + '.csv', 'r', encoding='utf-8') as csvfile:
- print('---分析日期', date, '弹幕...\n')
- reader = csv.reader(csvfile)
- for line in reader:
- danmuNum = danmuNum + 1
- line = "".join(line)
- line = re.sub(r"[%s]+" % punc, "", line)
- # words_list = jieba.lcut(line)
- # for word in words_list:
- # data[line] = data[line] + 1
- # line = line.lower()
- if len(line) >= 2 and len(line) <= 15:
- if danmuCount.get(line):
- danmuCount[line] = danmuCount[line] + 1
- else:
- danmuCount[line] = 1
- sortList = sorted(danmuCount.items(), key=lambda item:item[1], reverse=True)
- if len(sortList)>10:
- pltLists = sortList[:10]
- for plttuple in pltLists:
- saveLine = []
- saveLine.append(plttuple[0])
- saveLine.append('Chinese')
- saveLine.append(plttuple[1])
- saveLine.append(date)
- writer.writerow(saveLine)
复制代码
|
|