[已解决]PDF转换问题？合成后尺寸变了

任申猴 · 发表于 2024-6-11 14:48:34

本帖最后由任申猴于 2024-6-11 14:51 编辑

源文件：竖向的在浏览器打开100%状态看着是正常的；

由于需要把它转为图片在转成PDF（避免抠图）

import os
import fitz
from PIL import Image
import shutil
import re
import win32com.client as win32
def excel_to_pdf():
"""当前黑白版转PDF"""
current_directory = os.getcwd()
xls_files = [f for f in os.listdir(current_directory) if f.endswith('.xls')]
if not xls_files:
print('当前目录下没有 .xls 文件')
return
xls_file_names = [os.path.splitext(f)[0] for f in xls_files]
xls_path = os.path.join(current_directory, xls_files[0])
pdf_out_path = current_directory
if not os.path.exists(pdf_out_path):
os.makedirs(pdf_out_path)
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = False
workbook = excel.Workbooks.Open(xls_path)
pdf_path = os.path.join(pdf_out_path, f"{xls_file_names[0]}_报告.pdf")
workbook.ExportAsFixedFormat(0, pdf_path)
workbook.Close(False)
excel.Application.Quit()
result = re.search(r'\d+', pdf_path)
if result:
number_only = result.group()
pdf_name = f'LTJC{number_only}_报告.pdf'
rightmost_index = len(pdf_name) - 4
folder_name = pdf_name[:rightmost_index]
if not os.path.exists(folder_name):
os.makedirs(folder_name)
print(f"文件夹 '{folder_name}' 已成功创建。")
else:
print(f"文件夹 '{folder_name}' 已经存在。")
doc = fitz.open(pdf_name)
dpi = 300
zoom = dpi / 72 # PDF 默认是 72 DPI
mat = fitz.Matrix(zoom, zoom)
for page_num in range(len(doc)):
page = doc[page_num]
pix = page.get_pixmap(matrix=mat)
output = os.path.join(folder_name, f"page_{page_num}.png")
pix.save(output)
doc.close()
if os.path.exists(pdf_name):
os.remove(pdf_name)
print(f'文件 {pdf_name} 已成功删除')
else:
print(f'文件 {pdf_name} 不存在')
image_folder = folder_name
image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]
image_files.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))
images = []
for image_file in image_files:
img_path = os.path.join(image_folder, image_file)
img = Image.open(img_path)
if img.mode == 'RGBA':
img = img.convert('RGB')
print(dpi)
a4_width, a4_height = (2480, 3508)
# if dpi == 600:
# a4_width, a4_height = (4960, 7016)
# elif dpi == 1200:
# a4_width, a4_height = (9920, 14032)
img = img.resize((a4_width, a4_height), Image.Resampling.LANCZOS)
jpg_path = os.path.join(image_folder, f"page_{image_file.split('_')[1].split('.')[0]}.png")
img.save(jpg_path, format='JPEG', quality=85)
images.append(Image.open(jpg_path))
pdf_path = pdf_name[:15] + '_报告.pdf'
images[0].save(pdf_path, save_all=True, append_images=images[1:])
folder_path = folder_name
if os.path.exists(folder_path):
shutil.rmtree(folder_path)
print(f'文件夹 {folder_path} 已成功删除')
else:
print(f'文件夹 {folder_path} 不存在')
print(number_only)
return pdf_path, xls_file_names
excel_to_pdf()

复制代码

转换出来了，图片变大了，在浏览器打开100%状态下，大了几倍，但我设置的是300dpi ，请问大佬这是为什么？

最佳答案

月排行榜 / 总排行榜

任申猴

2024-6-11 14:48:35

images[0].save(output_pdf_path, save_all=True, append_images=images[1:],resolution=300,quality=100,optims=True)

复制代码

解决了，需要把这几个参数指定了

跳转到最佳答案楼层

任申猴 · 发表于 2024-6-11 14:48:35

images[0].save(output_pdf_path, save_all=True, append_images=images[1:],resolution=300,quality=100,optims=True)

复制代码

解决了，需要把这几个参数指定了

FishC · 发表于 2024-6-11 14:49:06

在进行PDF转换时，会涉及到一些尺寸的调整问题。有时候，转换后的PDF文件的尺寸可能与源文件不一致。这可能是由于转换工具在处理过程中对尺寸进行了调整。

为了解决这个问题，你可以尝试使用Python中的一个PDF处理库PyPDF2来进行转换，并在转换过程中指定输出的尺寸。下面是一个示例代码：

import PyPDF2
def convert_pdf(source_path, output_path, width=None, height=None):
# 打开源文件
with open(source_path, 'rb') as source_file:
# 创建一个PDF读取器对象
reader = PyPDF2.PdfReader(source_file)
# 创建一个PDF写入器对象
writer = PyPDF2.PdfWriter()
# 遍历每一页
for page in reader.pages:
# 获取源页面的尺寸
page_width = page.mediaBox[2]
page_height = page.mediaBox[3]
# 如果指定了输出尺寸，则调整页面尺寸
if width and height:
page.scaleTo(width, height)
# 将调整后的页面添加到写入器对象中
writer.add_page(page)
# 将写入器对象保存到输出文件中
with open(output_path, 'wb') as output_file:
writer.write(output_file)
# 调用函数进行PDF转换
convert_pdf('source.pdf', 'output.pdf', width=500, height=700)

复制代码

在上面的代码中，`convert_pdf`函数接受源文件路径、输出文件路径以及可选的输出尺寸作为参数。如果没有指定输出尺寸，将保持源文件的尺寸。可以根据自己的需求进行调整。

希望这个示例能够帮助到你解决问题！如果还有其他问题，请随时提问。

以上回复来自 FishC AI 小助理，如未能正确解答您的问题，请继续追问。

不二如是 · 发表于 2024-6-11 15:46:14

貌似，dpi没有生效，稍等我来测试下代码

不二如是 · 发表于 2024-6-11 15:51:58

鱼油代码中涉及将 Excel 文件转换为 PDF，再将 PDF 页面转换为图片，并最终调整图片大小以适应 A4 页面，保存为 JPEG 格式。

转换过程中出现的图像放大问题可能源于几个关键环节：

PDF 分辨率与显示分辨率的关系：您的代码中将 PDF 页面的分辨率设置为 300 DPI（点每英寸），然后将此图像缩放到 A4 纸的分辨率（2480x3508 像素）。这种缩放假设了 DPI 与尺寸的直接关系，即图片的物理尺寸会随着 DPI 的改变而线性缩放。
图像缩放逻辑：代码中使用 Image.Resampling.LANCZOS 方法进行图像缩放，这是一种高质量的重采样算法，适合保持图像质量。然而，如果原始图像的尺寸（以像素计）与目标尺寸相差很大，该算法可能导致视觉上的放大或缩小。
DPI 与图像显示大小的误解：DPI 主要是一个打印设置，表示每英寸应该有多少个点，而在电脑屏幕上查看图像时，实际显示大小还取决于屏幕分辨率和图像的像素尺寸。尽管设置了 300 DPI，但图像的像素尺寸（宽度和高度）是决定其在屏幕上显示大小的关键因素。
具体实现中的问题：代码将 PDF 页面转换为图像后，直接调整为 A4 的像素尺寸。如果原始 PDF 页面的实际像素尺寸小于或等于 A4 尺寸，这种调整可能无问题。但如果原始 PDF 页面的像素尺寸已经非常大（可能由于 PDF 的内部设置或内容本身的分辨率很高），则此调整可能导致图像在屏幕上显示得比预期更大。

解决方案：

检查 PDF 的原始尺寸：在进行图像缩放之前，首先检查 PDF 页面的原始像素尺寸。可以使用工具或库来读取这些信息。
适当调整图像尺寸：根据屏幕显示需要和打印需求调整图像的目标尺寸。如果原始图像尺寸远大于 A4 尺寸，应该相应减小缩放比例。
调整 DPI 设置：如果主要关注屏幕显示效果而非打印，可以适当降低 DPI 设置，使其更适合屏幕显示的分辨率。

任申猴 · 发表于 2024-6-11 18:32:50

不二如是发表于 2024-6-11 15:46
貌似，dpi没有生效，稍等我来测试下代码

咋看不了回复？？

不二如是 · 发表于 2024-6-11 20:11:07

任申猴发表于 2024-6-11 18:32
咋看不了回复？？

现在呢？

任申猴 · 发表于 2024-6-12 09:08:04

不二如是发表于 2024-6-11 20:11
现在呢？

还是白版

，AI生成的那个能看见，啥情况

Twilight6 · 发表于 2024-6-12 09:33:43

任申猴发表于 2024-6-12 09:08
还是白版，AI生成的那个能看见，啥情况

是不是因为你 resize 重新设置了尺寸了？

import os

import fitz

from PIL import Image

import shutil

import re

import win32com.client as win32

def excel_to_pdf():

"""当前黑白版转PDF"""

current_directory = os.getcwd()

xls_files = [f for f in os.listdir(current_directory) if f.endswith('.xls')]

if not xls_files:

      print('当前目录下没有 .xls 文件')

      return

xls_file_names = [os.path.splitext(f)[0] for f in xls_files]

xls_path = os.path.join(current_directory, xls_files[0])

pdf_out_path = current_directory

if not os.path.exists(pdf_out_path):

      os.makedirs(pdf_out_path)

excel = win32.gencache.EnsureDispatch('Excel.Application')

excel.Visible = False

workbook = excel.Workbooks.Open(xls_path)

pdf_path = os.path.join(pdf_out_path, f"{xls_file_names[0]}_报告.pdf")

workbook.ExportAsFixedFormat(0, pdf_path)

workbook.Close(False)

excel.Application.Quit()

result = re.search(r'\d+', pdf_path)

if result:

      number_only = result.group()

pdf_name = f'LTJC{number_only}_报告.pdf'

rightmost_index = len(pdf_name) - 4

folder_name = pdf_name[:rightmost_index]

if not os.path.exists(folder_name):

      os.makedirs(folder_name)

      print(f"文件夹 '{folder_name}' 已成功创建。")

else:

      print(f"文件夹 '{folder_name}' 已经存在。")

doc = fitz.open(pdf_name)

dpi = 300

zoom = dpi / 72  # PDF 默认是 72 DPI

mat = fitz.Matrix(zoom, zoom)

for page_num in range(len(doc)):

      page = doc[page_num]

      pix = page.get_pixmap(matrix=mat)

      output = os.path.join(folder_name, f"page_{page_num}.png")

      pix.save(output)

doc.close()

if os.path.exists(pdf_name):

      os.remove(pdf_name)

      print(f'文件 {pdf_name} 已成功删除')

else:

      print(f'文件 {pdf_name} 不存在')

image_folder = folder_name

image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]

image_files.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))

images = []

for image_file in image_files:

      img_path = os.path.join(image_folder, image_file)

      img = Image.open(img_path)

      if img.mode == 'RGBA':

         img = img.convert('RGB')



      # img = img.resize((a4_width, a4_height), Image.Resampling.LANCZOS)



      jpg_path = os.path.join(image_folder, f"page_{image_file.split('_')[1].split('.')[0]}.jpg")

      img.save(jpg_path, format='JPEG', quality=85)

      images.append(Image.open(jpg_path))

pdf_path = pdf_name[:15] + '_报告.pdf'

images[0].save(pdf_path, save_all=True, append_images=images[1:])

folder_path = folder_name

if os.path.exists(folder_path):

      shutil.rmtree(folder_path)

      print(f'文件夹 {folder_path} 已成功删除')

else:

      print(f'文件夹 {folder_path} 不存在')

print(number_only)

return pdf_path, xls_file_names

excel_to_pdf()

复制代码

试试把 resize 注释了

任申猴 · 发表于 2024-6-12 09:52:26

Twilight6 发表于 2024-6-12 09:33
是不是因为你 resize 重新设置了尺寸了？

不是，注释了还是，变大了

账号		自动登录	找回密码
密码			立即注册