PDF转换问题？合成后尺寸变了,Python交流,编程语言专区,鱼C论坛

任申猴 发表于 2024-6-11 14:48:34

PDF转换问题？合成后尺寸变了

本帖最后由任申猴于 2024-6-11 14:51 编辑

源文件：竖向的在浏览器打开100%状态看着是正常的；

由于需要把它转为图片在转成PDF（避免抠图）
import os
import fitz
from PIL import Image
import shutil
import re
import win32com.client as win32

def excel_to_pdf():
"""当前黑白版转PDF"""
current_directory = os.getcwd()
xls_files =
if not xls_files:
   print('当前目录下没有 .xls 文件')
   return

xls_file_names = for f in xls_files]
xls_path = os.path.join(current_directory, xls_files)
pdf_out_path = current_directory

if not os.path.exists(pdf_out_path):
   os.makedirs(pdf_out_path)

excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = False
workbook = excel.Workbooks.Open(xls_path)
pdf_path = os.path.join(pdf_out_path, f"{xls_file_names}_报告.pdf")
workbook.ExportAsFixedFormat(0, pdf_path)
workbook.Close(False)
excel.Application.Quit()

result = re.search(r'\d+', pdf_path)
if result:
   number_only = result.group()
pdf_name = f'LTJC{number_only}_报告.pdf'
rightmost_index = len(pdf_name) - 4
folder_name = pdf_name[:rightmost_index]

if not os.path.exists(folder_name):
   os.makedirs(folder_name)
   print(f"文件夹 '{folder_name}' 已成功创建。")
else:
   print(f"文件夹 '{folder_name}' 已经存在。")

doc = fitz.open(pdf_name)

dpi = 300
zoom = dpi / 72# PDF 默认是 72 DPI
mat = fitz.Matrix(zoom, zoom)

for page_num in range(len(doc)):
   page = doc
   pix = page.get_pixmap(matrix=mat)
   output = os.path.join(folder_name, f"page_{page_num}.png")
   pix.save(output)

doc.close()

if os.path.exists(pdf_name):
   os.remove(pdf_name)
   print(f'文件 {pdf_name} 已成功删除')
else:
   print(f'文件 {pdf_name} 不存在')

image_folder = folder_name
image_files =
image_files.sort(key=lambda x: int(x.split('_').split('.')))

images = []
for image_file in image_files:
   img_path = os.path.join(image_folder, image_file)
   img = Image.open(img_path)
   if img.mode == 'RGBA':
         img = img.convert('RGB')
   print(dpi)
   a4_width, a4_height = (2480, 3508)
   # if dpi == 600:
   # a4_width, a4_height = (4960, 7016)
   # elif dpi == 1200:
   # a4_width, a4_height = (9920, 14032)

   img = img.resize((a4_width, a4_height), Image.Resampling.LANCZOS)
   jpg_path = os.path.join(image_folder, f"page_{image_file.split('_').split('.')}.png")
   img.save(jpg_path, format='JPEG', quality=85)

   images.append(Image.open(jpg_path))

pdf_path = pdf_name[:15] + '_报告.pdf'
images.save(pdf_path, save_all=True, append_images=images)

folder_path = folder_name
if os.path.exists(folder_path):
   shutil.rmtree(folder_path)
   print(f'文件夹 {folder_path} 已成功删除')
else:
   print(f'文件夹 {folder_path} 不存在')

print(number_only)

return pdf_path, xls_file_names
excel_to_pdf()

转换出来了，图片变大了，在浏览器打开100%状态下，大了几倍，但我设置的是300dpi，请问大佬这是为什么？

任申猴 发表于 2024-6-11 14:48:35

images.save(output_pdf_path, save_all=True, append_images=images,resolution=300,quality=100,optims=True)
解决了，需要把这几个参数指定了

FishC 发表于 2024-6-11 14:49:06

在进行PDF转换时，会涉及到一些尺寸的调整问题。有时候，转换后的PDF文件的尺寸可能与源文件不一致。这可能是由于转换工具在处理过程中对尺寸进行了调整。

为了解决这个问题，你可以尝试使用Python中的一个PDF处理库PyPDF2来进行转换，并在转换过程中指定输出的尺寸。下面是一个示例代码：

import PyPDF2

def convert_pdf(source_path, output_path, width=None, height=None):
# 打开源文件
with open(source_path, 'rb') as source_file:
   # 创建一个PDF读取器对象
   reader = PyPDF2.PdfReader(source_file)

   # 创建一个PDF写入器对象
   writer = PyPDF2.PdfWriter()

   # 遍历每一页
   for page in reader.pages:
         # 获取源页面的尺寸
         page_width = page.mediaBox
         page_height = page.mediaBox

         # 如果指定了输出尺寸，则调整页面尺寸
         if width and height:
            page.scaleTo(width, height)

         # 将调整后的页面添加到写入器对象中
         writer.add_page(page)

   # 将写入器对象保存到输出文件中
   with open(output_path, 'wb') as output_file:
         writer.write(output_file)

# 调用函数进行PDF转换
convert_pdf('source.pdf', 'output.pdf', width=500, height=700)

在上面的代码中，`convert_pdf`函数接受源文件路径、输出文件路径以及可选的输出尺寸作为参数。如果没有指定输出尺寸，将保持源文件的尺寸。可以根据自己的需求进行调整。

希望这个示例能够帮助到你解决问题！如果还有其他问题，请随时提问。

以上回复来自 FishC AI 小助理，如未能正确解答您的问题，请继续追问。

不二如是 发表于 2024-6-11 15:46:14

貌似，dpi没有生效，稍等我来测试下代码

不二如是 发表于 2024-6-11 15:51:58

鱼油代码中涉及将 Excel 文件转换为 PDF，再将 PDF 页面转换为图片，并最终调整图片大小以适应 A4 页面，保存为 JPEG 格式。

转换过程中出现的图像放大问题可能源于几个关键环节：

[*]PDF 分辨率与显示分辨率的关系：您的代码中将 PDF 页面的分辨率设置为 300 DPI（点每英寸），然后将此图像缩放到 A4 纸的分辨率（2480x3508 像素）。这种缩放假设了 DPI 与尺寸的直接关系，即图片的物理尺寸会随着 DPI 的改变而线性缩放。
[*]图像缩放逻辑：代码中使用 Image.Resampling.LANCZOS 方法进行图像缩放，这是一种高质量的重采样算法，适合保持图像质量。然而，如果原始图像的尺寸（以像素计）与目标尺寸相差很大，该算法可能导致视觉上的放大或缩小。
[*]DPI 与图像显示大小的误解：DPI 主要是一个打印设置，表示每英寸应该有多少个点，而在电脑屏幕上查看图像时，实际显示大小还取决于屏幕分辨率和图像的像素尺寸。尽管设置了 300 DPI，但图像的像素尺寸（宽度和高度）是决定其在屏幕上显示大小的关键因素。
[*]具体实现中的问题：代码将 PDF 页面转换为图像后，直接调整为 A4 的像素尺寸。如果原始 PDF 页面的实际像素尺寸小于或等于 A4 尺寸，这种调整可能无问题。但如果原始 PDF 页面的像素尺寸已经非常大（可能由于 PDF 的内部设置或内容本身的分辨率很高），则此调整可能导致图像在屏幕上显示得比预期更大。

解决方案：

[*]检查 PDF 的原始尺寸：在进行图像缩放之前，首先检查 PDF 页面的原始像素尺寸。可以使用工具或库来读取这些信息。
[*]适当调整图像尺寸：根据屏幕显示需要和打印需求调整图像的目标尺寸。如果原始图像尺寸远大于 A4 尺寸，应该相应减小缩放比例。
[*]调整 DPI 设置：如果主要关注屏幕显示效果而非打印，可以适当降低 DPI 设置，使其更适合屏幕显示的分辨率。

任申猴 发表于 2024-6-11 18:32:50

不二如是发表于 2024-6-11 15:46
貌似，dpi没有生效，稍等我来测试下代码

咋看不了回复？？

不二如是 发表于 2024-6-11 20:11:07

任申猴发表于 2024-6-11 18:32
咋看不了回复？？

现在呢？

任申猴 发表于 2024-6-12 09:08:04

不二如是发表于 2024-6-11 20:11
现在呢？

还是白版{:5_104:}，AI生成的那个能看见，啥情况

Twilight6 发表于 2024-6-12 09:33:43

任申猴发表于 2024-6-12 09:08
还是白版，AI生成的那个能看见，啥情况

是不是因为你 resize 重新设置了尺寸了？

import os
import fitz
from PIL import Image
import shutil
import re
import win32com.client as win32

def excel_to_pdf():
"""当前黑白版转PDF"""
current_directory = os.getcwd()
xls_files =
if not xls_files:
   print('当前目录下没有 .xls 文件')
   return

xls_file_names = for f in xls_files]
xls_path = os.path.join(current_directory, xls_files)
pdf_out_path = current_directory

if not os.path.exists(pdf_out_path):
   os.makedirs(pdf_out_path)

excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = False
workbook = excel.Workbooks.Open(xls_path)
pdf_path = os.path.join(pdf_out_path, f"{xls_file_names}_报告.pdf")
workbook.ExportAsFixedFormat(0, pdf_path)
workbook.Close(False)
excel.Application.Quit()

result = re.search(r'\d+', pdf_path)
if result:
   number_only = result.group()
pdf_name = f'LTJC{number_only}_报告.pdf'
rightmost_index = len(pdf_name) - 4
folder_name = pdf_name[:rightmost_index]

if not os.path.exists(folder_name):
   os.makedirs(folder_name)
   print(f"文件夹 '{folder_name}' 已成功创建。")
else:
   print(f"文件夹 '{folder_name}' 已经存在。")

doc = fitz.open(pdf_name)

dpi = 300
zoom = dpi / 72# PDF 默认是 72 DPI
mat = fitz.Matrix(zoom, zoom)

for page_num in range(len(doc)):
   page = doc
   pix = page.get_pixmap(matrix=mat)
   output = os.path.join(folder_name, f"page_{page_num}.png")
   pix.save(output)

doc.close()

if os.path.exists(pdf_name):
   os.remove(pdf_name)
   print(f'文件 {pdf_name} 已成功删除')
else:
   print(f'文件 {pdf_name} 不存在')

image_folder = folder_name
image_files =
image_files.sort(key=lambda x: int(x.split('_').split('.')))

images = []
for image_file in image_files:
   img_path = os.path.join(image_folder, image_file)
   img = Image.open(img_path)
   if img.mode == 'RGBA':
         img = img.convert('RGB')

   # img = img.resize((a4_width, a4_height), Image.Resampling.LANCZOS)

   jpg_path = os.path.join(image_folder, f"page_{image_file.split('_').split('.')}.jpg")
   img.save(jpg_path, format='JPEG', quality=85)
   images.append(Image.open(jpg_path))

pdf_path = pdf_name[:15] + '_报告.pdf'
images.save(pdf_path, save_all=True, append_images=images)

folder_path = folder_name
if os.path.exists(folder_path):
   shutil.rmtree(folder_path)
   print(f'文件夹 {folder_path} 已成功删除')
else:
   print(f'文件夹 {folder_path} 不存在')
print(number_only)
return pdf_path, xls_file_names

excel_to_pdf()

试试把 resize 注释了

任申猴 发表于 2024-6-12 09:52:26

Twilight6 发表于 2024-6-12 09:33
是不是因为你 resize 重新设置了尺寸了？

不是，注释了还是，变大了

页: [1]

鱼C论坛's Archiver

PDF转换问题？合成后尺寸变了