[已解决]Python os，习题的4个小问题。

Peteryo01223 · 发表于 2021-1-27 09:23:58

本帖最后由 Peteryo01223 于 2021-1-27 09:26 编辑

原题：
编写一个程序，用户输入关键字，查找当前文件夹内（如果当前文件夹内包含文件夹，则进入文件夹继续搜索）所有含有该关键字的文本文件（.txt后缀），要求显示该文件所在的位置以及关键字在文件中的具体位置（第几行第几个字符），程序实现如图（从略）：

看答案后，照抄了一遍，加了encoding = 'UTF-8'，及我自己的注释：可惜，运行报错。

import os
def print_pos(key_dict):
keys = key_dict.keys()
# 这里.keys()函数，以列表返回字典里所有的键
keys = sorted(keys)
# 由于字典是无序的，这里用sorted()对行数进行排序
for each_key in keys:
print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key, str(key_dict.keys[each_key])))
# 最后的 str() 函数将指定的值转换为字符串。问题3：这里 each_key 怎么是行数呢？！ each_key，它是个键或关键字吧？
def pos_in_line(line, key):
# 定义一个行内str的位置函数，为了在 line 中查找 key的位置
pos = [] # 先定义一个叫做位置 pos 的空列表
begin = line.find(key) # 用 find()方法定位,也就是找到字符串line中key的索引值
while begin != -1:
# 只要不是最后一位，就继续找。首次见到 while 的条件是 -1 的写法
pos.append(begin + 1)
# Python 的索引值和普通人不同，人是从1开始数，而索引是从0开始
begin = line.find(key, begin + 1)
# 继续迭代，从下一个位置继续找
return pos
# 保存好 pos 的值
def search_in_file(file_name, key):
# 定义一个文件内行位置的函数，为了在 file_name文件中查找出现了 key 的行的位置
f = open(file_name, encoding = 'UTF-8')
# 打开这个文件。我在此特意添加了 encoding = 'UTF-8'。
count = 0
# 给一个叫做 count 的变量赋值为零，以便下面对行数进行记录
key_dict = dict()
# 定义一个空的字典，用户存放 key 所在行数对应具体的位置
for each_line in f:
# 这个文件 f 中，的每一行
count += 1
# count 的变量加上1，即每统计一次，这个值都要加 1
if key in each_line:
# 如果 key 出现在了 each_line里面的话
pos = pos_in_line(each_line, key)
# 本函数负责算出 key 在每一行对应的位置
key_dict[count] = pos
# 把 pos 的值，当作value，给字典 key_dict 中对应 count的 key
f.close()
# 关闭 f, 确保数据不丢失
return key_dict
# 返回这个叫做 key_dict 的字典的值
def search_files(key, detail):
# 定义一个位置函数，为了保证如果当前文件夹内包含文件夹，则进入文件夹继续搜索
all_files = os.walk(os.getcwd())
# 用 os.getcwd(), 返回当前工作的这个目录
# 用 os.walk()方法, 遍历目录,统计出在目录树中全部的文件名，向上或者向下都包括了
txt_files = []
# 设置一个空列表，用于下面存储全部 txt s属性的文档
for i in all_files:
# 对于每一个目录树中的文件名
for each_file in i[2]:
# 对于第三个文件中的中的每个文件？问题4：这句话，想说什么？str i 当中，索引值为2的位置，的每一个 each? 不懂。
if os.path.splitext(each_file)[1] == '.txt':
# 根据后缀判断,如果是文本文件
txt_files.append(each_file)
# 就把这个文本文件放入 txt_files 列表
for each_txt_file in txt_files:
# 对于每一个在 txt_files 列表中出现的文件名
key_dict = search_in_file(each_txt_file, key)
# 把这个文件名，和对应的那个程序使用者在搜寻的 key，放入 key_dict 字典中
if key_dict:
# 如果key_dict 字典为真，即：发现了关键字了
print('========================================================')
print('在文件【%s】中找到关键字【%s】' % (each_txt_file, key))
if detail in ['YES','Yes','yes']: # 问题5：detail 是个变量，英语意思是细节，小甲鱼这个detail变量在定义什么，没看懂。
print_pos(key_dict)
# 运行此‘位置’函数
key = input('请将该脚本放于待查找的文件夹内，请输入关键字：')
detail = input('请问是否需要打印关键字【%s】在文件中的具体位置（YES/NO）:' % key)
search_files(key, detail) # 运行此函数

复制代码

运行后的报错：

=========================== RESTART: F:/20210127a.py ===========================
请将该脚本放于待查找的文件夹内，请输入关键字：下
请问是否需要打印关键字【下】在文件中的具体位置（YES/NO）:yes
========================================================
在文件【a.txt】中找到关键字【下】
Traceback (most recent call last):
File "F:/20210127a.py", line 83, in <module>
search_files(key, detail) # 运行此函数
File "F:/20210127a.py", line 78, in search_files
print_pos(key_dict)
File "F:/20210127a.py", line 9, in print_pos
print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key, str(key_dict.keys[each_key])))
TypeError: 'builtin_function_or_method' object is not subscriptable
>>>

复制代码

问题：
1. 报错该怎么改？
2. 第9行，print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key, str(key_dict.keys[each_key])))，这里 each_key 怎么会是第几行的行数呢？我理解，each_key，是一个键，一个关键字吧？
3. 第62行，for each_file in i[2]:，这是什么意思？str i当中，索引值为2的位置，的每一个 each?
4. 第77行，detail 是个变量，英语是 ‘细节’ 的意思，小甲鱼拿这个detail变量在定义什么。是随便命名的么？

最佳答案

月排行榜 / 总排行榜

逃兵

2021-1-27 09:23:59

本帖最后由逃兵于 2021-1-27 09:49 编辑

1.报错
你抄错了
第9行应该为

print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key, str(key_dict[each_key])))

复制代码

第65行少抄一行

each_file = os.path.join(i[0], each_file)

复制代码

原答案：

import os
def print_pos(key_dict):
keys = key_dict.keys()
keys = sorted(keys) # 由于字典是无序的，我们这里对行数进行排序
for each_key in keys:
print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key, str(key_dict[each_key])))
def pos_in_line(line, key):
pos = []
begin = line.find(key)
while begin != -1:
pos.append(begin + 1) # 用户的角度是从1开始数
begin = line.find(key, begin+1) # 从下一个位置继续查找
return pos
def search_in_file(file_name, key):
f = open(file_name)
count = 0 # 记录行数
key_dict = dict() # 字典，用户存放key所在具体行数对应具体位置
for each_line in f:
count += 1
if key in each_line:
pos = pos_in_line(each_line, key) # key在每行对应的位置
key_dict[count] = pos
f.close()
return key_dict
def search_files(key, detail):
all_files = os.walk(os.getcwd())
txt_files = []
for i in all_files:
for each_file in i[2]:
if os.path.splitext(each_file)[1] == '.txt': # 根据后缀判断是否文本文件
each_file = os.path.join(i[0], each_file)
txt_files.append(each_file)
for each_txt_file in txt_files:
key_dict = search_in_file(each_txt_file, key)
if key_dict:
print('================================================================')
print('在文件【%s】中找到关键字【%s】' % (each_txt_file, key))
if detail in ['YES', 'Yes', 'yes']:
print_pos(key_dict)
key = input('请将该脚本放于待查找的文件夹内，请输入关键字：')
detail = input('请问是否需要打印关键字【%s】在文件中的具体位置（YES/NO）：' % key)
search_files(key, detail)

复制代码

跳转到最佳答案楼层

逃兵 · 发表于 2021-1-27 09:23:59

本帖最后由逃兵于 2021-1-27 09:49 编辑

1.报错
你抄错了
第9行应该为

print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key, str(key_dict[each_key])))

复制代码

第65行少抄一行

each_file = os.path.join(i[0], each_file)

复制代码

原答案：

import os
def print_pos(key_dict):
keys = key_dict.keys()
keys = sorted(keys) # 由于字典是无序的，我们这里对行数进行排序
for each_key in keys:
print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key, str(key_dict[each_key])))
def pos_in_line(line, key):
pos = []
begin = line.find(key)
while begin != -1:
pos.append(begin + 1) # 用户的角度是从1开始数
begin = line.find(key, begin+1) # 从下一个位置继续查找
return pos
def search_in_file(file_name, key):
f = open(file_name)
count = 0 # 记录行数
key_dict = dict() # 字典，用户存放key所在具体行数对应具体位置
for each_line in f:
count += 1
if key in each_line:
pos = pos_in_line(each_line, key) # key在每行对应的位置
key_dict[count] = pos
f.close()
return key_dict
def search_files(key, detail):
all_files = os.walk(os.getcwd())
txt_files = []
for i in all_files:
for each_file in i[2]:
if os.path.splitext(each_file)[1] == '.txt': # 根据后缀判断是否文本文件
each_file = os.path.join(i[0], each_file)
txt_files.append(each_file)
for each_txt_file in txt_files:
key_dict = search_in_file(each_txt_file, key)
if key_dict:
print('================================================================')
print('在文件【%s】中找到关键字【%s】' % (each_txt_file, key))
if detail in ['YES', 'Yes', 'yes']:
print_pos(key_dict)
key = input('请将该脚本放于待查找的文件夹内，请输入关键字：')
detail = input('请问是否需要打印关键字【%s】在文件中的具体位置（YES/NO）：' % key)
search_files(key, detail)

复制代码

洛阳城 · 发表于 2021-1-27 09:54:54

1.暂时还没完全看懂，还没解决，因为我把代码复制到本地尝试复现你的环境然后debug但是遇到了不同得报错哈哈哈。。。
2.这里注意，字典是无序的，但是根据代码04、06行，这里的keys是按顺序排列后的字典内关键字！所以这里的行数对应的是排序后的顺序
3.代码55行的os.walk()返回的每个元素都是一个三元组(root,dirs,files)，故i[2]对应的是每个元素文件的文件名
4.联合代码第77、82行可以知道，这里detail是一个标志位，是在判断用户需不需要打印关键字在文件中的具体位置，如果你输入的是YES/Yes/yes就打印

Peteryo01223 · 发表于 2021-1-27 10:02:57

本帖最后由 Peteryo01223 于 2021-1-27 10:05 编辑

逃兵发表于 2021-1-27 09:48
1.报错
你抄错了
第9行应该为

好的，缺行问题，及抄错问题，我已经更正，谢谢。

不过，我用标准答案，run 后结果，前半部分正常，后半部分，还是报错。如下：

=========================== RESTART: F:/20210127b.py ===========================
请将该脚本放于待查找的文件夹内，请输入关键字：下
请问是否需要打印关键字【下】在文件中的具体位置（YES/NO）：yes
================================================================
在文件【F:\a.txt】中找到关键字【下】
关键字出现在第 14 行，第 [4] 个位置。
关键字出现在第 24 行，第 [12] 个位置。
================================================================
在文件【F:\b.txt】中找到关键字【下】
关键字出现在第 14 行，第 [4] 个位置。
关键字出现在第 24 行，第 [12] 个位置。
Traceback (most recent call last):
File "F:/20210127b.py", line 56, in <module>
search_files(key, detail)
File "F:/20210127b.py", line 46, in search_files
key_dict = search_in_file(each_txt_file, key)
File "F:/20210127b.py", line 25, in search_in_file
for each_line in f:
File "C:\Users\user\AppData\Local\Programs\Python\Python38\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xdf in position 0: invalid continuation byte
>>>

复制代码

逃兵 · 发表于 2021-1-27 10:07:04

Peteryo01223 发表于 2021-1-27 10:02
好的，缺行问题，及抄错问题，我已经更正，谢谢。

不过，我用标准答案，run 后结果，前半部分正常， ...

UnicodeDecodeError：“ utf-8”编解码器无法解码位置0的字节0xdf：无效的连续字节

你把encoding='utf-8'去掉试试看

Peteryo01223 · 发表于 2021-1-27 10:18:47

洛阳城发表于 2021-1-27 09:54
1.暂时还没完全看懂，还没解决，因为我把代码复制到本地尝试复现你的环境然后debug但是遇到了不同得报错哈 ...

很有帮助的提示，谢谢

Peteryo01223 · 发表于 2021-1-27 10:19:56

逃兵发表于 2021-1-27 10:07
UnicodeDecodeError：“ utf-8”编解码器无法解码位置0的字节0xdf：无效的连续字节

你把encoding='utf ...

貌似不行的，我这个版本是 python38，每次查看中文 txt 文件，都不得不加上这个 encoding='utf-8‘，要不然直接 copy 小甲鱼的标准答案，都会报错的。

逃兵 · 发表于 2021-1-27 10:35:00

Peteryo01223 发表于 2021-1-27 10:19
貌似不行的，我这个版本是 python38，每次查看中文 txt 文件，都不得不加上这个 encoding='utf-8‘，要不 ...

encoding='gbk'试一下

Peteryo01223 · 发表于 2021-1-27 10:42:52

逃兵发表于 2021-1-27 10:35
encoding='gbk'试一下

试了试，报错了。

请将该脚本放于待查找的文件夹内，请输入关键字：下
请问是否需要打印关键字【下】在文件中的具体位置（YES/NO）:yes
Traceback (most recent call last):
File "F:/20210127a.py", line 85, in <module>
search_files(key, detail) # 运行此函数
File "F:/20210127a.py", line 73, in search_files
key_dict = search_in_file(each_txt_file, key)
File "F:/20210127a.py", line 35, in search_in_file
for each_line in f:
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 2: illegal multibyte sequence
>>>

复制代码

账号		自动登录	找回密码
密码			立即注册