python 新手,文件readline()报错
def file_compare(file1, file2):prefix = 'C:\\Learning_file\\'
f1 = open(prefix + file1)
f2 = open(prefix + file2)
count = 0 # 统计行数
differ = [] # 统计不一样的数量
for line1 in f1:
line2 = f2.readline()
count += 1
if line1 != line2:
differ.append(count)
f1.close()
f2.close()
return differ
file1 = input('请输入需要比较的头一个文件名:')
file2 = input('请输入需要比较的另一个文件名:')
differ = file_compare(file1, file2)
if len(differ) == 0:
print('两个文件完全一样!')
else:
print('两个文件共有【%d】处不同:' % len(differ))
for each in differ:
print('第 %d 行不一样' % each)
def file_compare(file1, file2):
prefix = 'C:\\Learning_file\\'
f1 = open(prefix + file1, encoding='utf-8') # 改了这里
f2 = open(prefix + file2, encoding='utf-8') # 改了这里
count = 0 # 统计行数
differ = [] # 统计不一样的数量
for line1 in f1:
line2 = f2.readline()
count += 1
if line1 != line2:
differ.append(count)
f1.close()
f2.close()
return differ
file1 = input('请输入需要比较的头一个文件名:')
file2 = input('请输入需要比较的另一个文件名:')
differ = file_compare(file1, file2)
if len(differ) == 0:
print('两个文件完全一样!')
else:
print('两个文件共有【%d】处不同:' % len(differ))
for each in differ:
print('第 %d 行不一样' % each) 我用的anaconda里的jupter,会不会是这个编译器的问题啊
请输入需要比较的头一个文件名:something.txt
请输入需要比较的另一个文件名:something2.txt
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_7880\2847495758.py in <module>
19 file2 = input('请输入需要比较的另一个文件名:')
20
---> 21 differ = file_compare(file1, file2)
22
23 if len(differ) == 0:
~\AppData\Local\Temp\ipykernel_7880\2847495758.py in file_compare(file1, file2)
6 differ = [] # 统计不一样的数量
7
----> 8 for line1 in f1:
9 line2 = f2.readline()
10 count += 1
C:\Anaconda\envs\fish_c\lib\codecs.py in decode(self, input, final)
320 # decode input (taking the buffer into account)
321 data = self.buffer + input
--> 322 (result, consumed) = self._buffer_decode(data, self.errors, final)
323 # keep undecoded input until the next call
324 self.buffer = data
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb4 in position 0: invalid start byte yixing 发表于 2022-4-12 14:26
我用的anaconda里的jupter,会不会是这个编译器的问题啊
跟那个无关,是编码问题def file_compare(file1, file2):
prefix = 'C:\\Learning_file\\'
f1 = open(prefix + file1, encoding='utf-8-sig') # 改了这里
f2 = open(prefix + file2, encoding='utf-8-sig') # 改了这里
count = 0 # 统计行数
differ = [] # 统计不一样的数量
for line1 in f1:
line2 = f2.readline()
count += 1
if line1 != line2:
differ.append(count)
f1.close()
f2.close()
return differ
file1 = input('请输入需要比较的头一个文件名:')
file2 = input('请输入需要比较的另一个文件名:')
differ = file_compare(file1, file2)
if len(differ) == 0:
print('两个文件完全一样!')
else:
print('两个文件共有【%d】处不同:' % len(differ))
for each in differ:
print('第 %d 行不一样' % each) 电脑中的文本文件不可能是一种编码,编码检测机制才是最好的解决办法。
安装模块 cchardet 检测编码吧
import cchardet as chardet
def file_enc(file):
with open(file, "rb") as f:
msg = f.read()
enc = chardet.detect(msg) # 返回的是个字典 编码和准确度。如:{'encoding': 'UTF-8', 'confidence': 0.9900000095367432}
return enc['encoding']
def file_compare(file1, file2):
prefix = 'C:\\Learning_file\\'
f1 = open(prefix + file1, encoding=enc) # 然后以指定编码打开文件
f2 = open(prefix + file2, encoding=enc) # 然后以指定编码打开文件
count = 0 # 统计行数
differ = [] # 统计不一样的数量
for line1 in f1:
line2 = f2.readline()
count += 1
if line1 != line2:
differ.append(count)
f1.close()
f2.close()
return differ
file1 = input('请输入需要比较的头一个文件名:')
file2 = input('请输入需要比较的另一个文件名:')
differ = file_compare(file1, file2)
if len(differ) == 0:
print('两个文件完全一样!')
else:
print('两个文件共有【%d】处不同:' % len(differ))
for each in differ:
print('第 %d 行不一样' % each)
页:
[1]