马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import re
import string
# Cleaning Functions
def remove_tag(text):
tag = re.compile(r'@\S+')#匹配连续字符
return tag.sub(r'',text)#使用sub函数用空串替换
def remove_URL(text):
# http:... / https:... / www... #匹配网页链接
url = re.compile(r'https?://\S+|www\.\S+')
return re.sub(url,'',text)
def remove_html(text):#匹配特殊符号
# < > / ( )
html = re.compile(r'<[^>]+>|\([^)]+\)')
return html.sub(r'',text)
def remove_punct(text):
# ['!','"','$','%','&',"'",'(',')','*',
# '+',',','-','.','/',':',';','<','=',
# '>','?','@','[','\\',']','^','_','`',
# '{','|','}','~']
punctuations = list(string.punctuation)
table = str.maketrans('', '', ''.join(punctuations))
return text.translate(table)
其中,是什么意思 |