Python3 PDF Page Numberer
I had several pdfs of several hundred pages and was tasked to “add page numbers on them”.
PDFs ALREADY have page numbers through most apps that read PDFs, but … sure, ok.
To be fair they wanted them all in 1 file too (merged).
This script will merge 1 or more pdfs together, and add a page number on each.
Place the pdfs in the same folder as the script. It completes the task quickly (within seconds for the 20 pdfs and several hundred pages each I had).
It adds a small rectangle at the bottom right of every page with the incremented page number.
requirements.txt
aiofiles==23.2.1
anyio==4.3.0
certifi==2024.2.2
charset-normalizer==3.3.2
ci-info==0.3.0
click==8.1.7
colorama==0.4.6
configobj==5.0.8
configparser==7.0.0
etelemetry==0.3.1
filelock==3.14.0
fitz==0.0.1.dev2
frontend==0.0.3
h11==0.14.0
httplib2==0.22.0
idna==3.7
isodate==0.6.1
itsdangerous==2.2.0
looseversion==1.3.0
lxml==5.2.2
networkx==3.3
nibabel==5.2.1
nipype==1.8.6
numpy==1.26.4
packaging==24.0
pandas==2.2.2
pathlib==1.0.1
prov==2.0.0
pydot==2.0.0
PyMuPDF==1.24.4
PyMuPDFb==1.24.3
pyparsing==3.1.2
python-dateutil==2.9.0.post0
pytz==2024.1
pyxnat==1.6.2
rdflib==7.0.0
requests==2.31.0
scipy==1.13.0
simplejson==3.19.2
six==1.16.0
sniffio==1.3.1
starlette==0.37.2
traits==6.3.2
tzdata==2024.1
urllib3==2.2.1
uvicorn==0.29.0
PDF Page Numberer Script
import fitz # PyMuPDF
import sys
import os
import glob
from datetime import datetime
def add_page_numbers(input_pdf_path, output_pdf_path):
# Open the PDF file
doc = fitz.open(input_pdf_path)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
# Define the position for the page number
rect = fitz.Rect(page.rect.width - 70, page.rect.height - 50, page.rect.width - 30, page.rect.height - 30)
# Draw a hot pink rectangle to ensure the number is readable
page.draw_rect(rect, color=(1, 0.41, 0.71), fill=1)
# Center the text within the rectangle
text = str(page_num + 1)
# Calculate the center of the rectangle
text_width = fitz.get_text_length(text, fontsize=12)
text_height = 12 # Approximate height for fontsize=12
center_x = rect.x0 + (rect.width - text_width) / 2
center_y = rect.y0 + (rect.height - text_height) / 2 + text_height # Adjust for baseline
# Add the page number text
page.insert_text((center_x, center_y), text, fontsize=12, fontname="helv", color=(0, 0, 0))
# Save the modified PDF to a new file
doc.save(output_pdf_path)
def print_help():
help_message = """
Usage: python add_page_numbers.py [-s] [-h] [<input_pdf_path>]
This script adds page numbers to PDF files.
Arguments:
-s Stitch all matching PDF files together, then add page numbers and export as a single file.
-h Display this help message.
<input_pdf_path> (Optional) Path to the input PDF file. If not provided, all PDF files in the current directory
that do not have '-n.pdf' in their filename will be processed individually.
The output file for individual processing will be named the same as the input file with '-n' appended before the file extension.
For example, if the input file is 'somefile.pdf', the output file will be 'somefile-n.pdf'.
The output file for stitching will be named with the current timestamp.
"""
print(help_message)
def process_all_pdfs():
for input_pdf_path in glob.glob("*.pdf"):
if not input_pdf_path.endswith("-n.pdf"):
file_name, file_extension = os.path.splitext(input_pdf_path)
output_pdf_path = f"{file_name}-n{file_extension}"
add_page_numbers(input_pdf_path, output_pdf_path)
print(f"Page numbers added. Output file saved as '{output_pdf_path}'")
def stitch_and_process_pdfs():
pdf_files = [f for f in glob.glob("*.pdf") if not f.endswith("-n.pdf")]
if not pdf_files:
print("No PDF files to stitch and process.")
return
# Create a new document to stitch all PDFs together
stitched_doc = fitz.open()
for pdf_file in pdf_files:
doc = fitz.open(pdf_file)
for page_num in range(len(doc)):
stitched_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
stitched_output_path = f"stitched_{timestamp}.pdf"
stitched_doc.save(stitched_output_path)
stitched_doc.close()
# Add page numbers to the stitched PDF
output_pdf_path = f"stitched_{timestamp}-n.pdf"
add_page_numbers(stitched_output_path, output_pdf_path)
os.remove(stitched_output_path)
print(f"Stitched and page-numbered PDF saved as '{output_pdf_path}'")
if __name__ == "__main__":
if len(sys.argv) > 2:
print_help()
sys.exit(1)
if len(sys.argv) == 2:
if sys.argv[1] == "-h":
print_help()
elif sys.argv[1] == "-s":
stitch_and_process_pdfs()
else:
input_pdf_path = sys.argv[1]
if not os.path.isfile(input_pdf_path):
print(f"Error: File '{input_pdf_path}' not found.")
sys.exit(1)
file_name, file_extension = os.path.splitext(input_pdf_path)
output_pdf_path = f"{file_name}-n{file_extension}"
add_page_numbers(input_pdf_path, output_pdf_path)
print(f"Page numbers added. Output file saved as '{output_pdf_path}'")
else:
process_all_pdfs()