求问怎么让程序run出结果,Python交流,编程语言专区,鱼C论坛

clclclcl 发表于 2020-8-6 00:57:16

求问怎么让程序run出结果

自己试了好久，真的最后run不出来了……不知道哪里卡住……

求问各路大神怎么能够得出结果……
谢谢！

# @Software : PyCharm

import requests
import bs4
import re

#拆解list
def downgradedict(init_dic):
list = []
for k in init_dic:
   list.append(init_dic)
return list

#获取指定单词的所有具体解释词条
def getexplanations(word):
set = []
url = "https://www.lexico.com/definition/%s" %word
headers = {
   'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
res = requests.get(url, headers=headers)
data = bs4.BeautifulSoup(res.text, "html.parser")
#抓出来的会有许多空列表，这里只把有内容的部分加入返回值的集合里面
for i in range(0, 20):
   for j in range(0, 20):
         exp = data.select(
            "#content > div.lex-container > div.main-content > div > div > div > div > section:nth-child( %d ) > ul > li:nth-child( %d ) > div > p > span.ind" % (
            i, j))
         if len(exp) == 1:
            key = exp
            set.append(key.get_text())
         elif len(exp) == 0:
            pass
return set

上面这个函数经常会run出来许多空列表，不知道有没有什么方便的方法能直接略过它们

#将抓出来的解释列表化为单词列表
def getwords(list):
list2 = []
for i in list:
   k = re.split(r'[;,\s]\s*', i)
   #只留下纯单词，不要标点符号和括号
   for x in k:
         x = x.lower()
         x = x.replace(".", "")
         x = x.replace(")", "")
         x = x.replace("(", "")
         #单词列表里面词不重复
         if x not in list2:
            list2.append(x)
         elif x in list2:
            pass
return list2

似乎是这一步出了问题，但不知道具体如何

#指定一个单词，然后找与之相关的所有解释，统计里面出现出来的词，再加回这个单词列表里面，直到列表不再增长
def getallwords(word):
set1 =
set2 = []
while sorted(set1) != sorted(set2):#设立两个列表，比较两个列表是否一致（研究需要这一点）
   for i in set1:
         x = getexplanations(i)
         y = getwords(x)
         for item in y:             #遍历第一个表单里面的单词，然后找与这些单词相关所有词典解释使用的单词
            if item not in set2: #如果找到的这些单词不在列表2，则将这些单词添加进列表1和列表2
               set1.append(item)
               set2.append(item)
            else:
               pass             #如果这些单词已经存在，则略过
return set2

print(getallwords('phrase'))

yhhpf 发表于 2020-8-6 08:47:56

不是跑不出来...是要跑N久...用多进程搞搞吧~~~

clclclcl 发表于 2020-8-6 11:17:51

yhhpf 发表于 2020-8-6 08:47
不是跑不出来...是要跑N久...用多进程搞搞吧~~~

并不会进程 > <

稍微改了下最后循环的结构，先跑跑看了……
def getallwords(word):
set1 =
set2 =
while len(set1) != 0:
   for i in set1:
         x = getexplanations(i)
         y = getwords(x)
         for item in y:
            if item in set1:
               set1.remove(item)
            else:
               set1.append(item)
            if item not in set2:
               set2.append(item)
            else:
               pass
return set2

陈尚涵 发表于 2020-8-6 16:27:42

clclclcl 发表于 2020-8-6 11:17
并不会进程 > <

稍微改了下最后循环的结构，先跑跑看了……

这个确实最好用进程搞，要不你学学进程？

clclclcl 发表于 2020-8-6 20:22:00

陈尚涵发表于 2020-8-6 16:27
这个确实最好用进程搞，要不你学学进程？

这就去学{:10_285:}

clclclcl 发表于 2020-8-6 20:27:56

改进了一下，能run出来了# @Software : PyCharm

from lxml.etree import HTML
import requests
import re
import bs4

#拆解list
def downgradedict(init_dic):
list = []
for k in init_dic:
   list.append(init_dic)
return list

#获取指定单词的所有具体解释词条
def getexplanations(word):
url = "https://www.lexico.com/definition/%s" %word
headers = {
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
request = requests.get(url, headers=headers)
html = HTML(request.text)
word_content = html.xpath('//span[@class="ind"]/text()')
return word_content[:1]

#将抓出来的解释列表化为单词列表
def getwords(list):
list2 = []
for i in list:
   k = re.split(r'[;,\s]\s*', i)
   #只留下纯单词，不要标点符号和括号
   for x in k:
         x = x.lower()
         x = x.replace(".", "")
         x = x.replace(")", "")
         x = x.replace("(", "")
         #单词列表里面词不重复
         if x not in list2:
            list2.append(x)
         elif x in list2:
            pass
return list2

#指定一个单词，然后找与之相关的所有解释，统计里面出现出来的词，再加回这个单词列表里面，直到列表不再增长
def getallwords(word):
set1 =
set2 =
while len(set1) != 0:
   for i in set1:
         x = getexplanations(i)
         y = getwords(x)
         set1 = list(set(y) - set(set2))
         set2 = list(set(set2).union(y))
return set2

print(getallwords('position'))

陈尚涵 发表于 2020-8-7 13:59:43

clclclcl 发表于 2020-8-6 20:27
改进了一下，能run出来了

总算解决了

页: [1]

鱼C论坛's Archiver

求问怎么让程序run出结果