Information Technology Grimoire

Version .0.0.1

IT Notes from various projects because I forget, and hopefully they help you too.

Python3 PDF Page Numberer

I had several pdfs of several hundred pages and was tasked to “add page numbers on them”.

PDFs ALREADY have page numbers through most apps that read PDFs, but … sure, ok.

To be fair they wanted them all in 1 file too (merged).

This script will merge 1 or more pdfs together, and add a page number on each.

Place the pdfs in the same folder as the script. It completes the task quickly (within seconds for the 20 pdfs and several hundred pages each I had).

It adds a small rectangle at the bottom right of every page with the incremented page number.

requirements.txt

aiofiles==23.2.1
anyio==4.3.0
certifi==2024.2.2
charset-normalizer==3.3.2
ci-info==0.3.0
click==8.1.7
colorama==0.4.6
configobj==5.0.8
configparser==7.0.0
etelemetry==0.3.1
filelock==3.14.0
fitz==0.0.1.dev2
frontend==0.0.3
h11==0.14.0
httplib2==0.22.0
idna==3.7
isodate==0.6.1
itsdangerous==2.2.0
looseversion==1.3.0
lxml==5.2.2
networkx==3.3
nibabel==5.2.1
nipype==1.8.6
numpy==1.26.4
packaging==24.0
pandas==2.2.2
pathlib==1.0.1
prov==2.0.0
pydot==2.0.0
PyMuPDF==1.24.4
PyMuPDFb==1.24.3
pyparsing==3.1.2
python-dateutil==2.9.0.post0
pytz==2024.1
pyxnat==1.6.2
rdflib==7.0.0
requests==2.31.0
scipy==1.13.0
simplejson==3.19.2
six==1.16.0
sniffio==1.3.1
starlette==0.37.2
traits==6.3.2
tzdata==2024.1
urllib3==2.2.1
uvicorn==0.29.0

PDF Page Numberer Script

import fitz  # PyMuPDF
import sys
import os
import glob
from datetime import datetime

def add_page_numbers(input_pdf_path, output_pdf_path):
    # Open the PDF file
    doc = fitz.open(input_pdf_path)
    
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        # Define the position for the page number
        rect = fitz.Rect(page.rect.width - 70, page.rect.height - 50, page.rect.width - 30, page.rect.height - 30)
        # Draw a hot pink rectangle to ensure the number is readable
        page.draw_rect(rect, color=(1, 0.41, 0.71), fill=1)
        
        # Center the text within the rectangle
        text = str(page_num + 1)
        # Calculate the center of the rectangle
        text_width = fitz.get_text_length(text, fontsize=12)
        text_height = 12  # Approximate height for fontsize=12
        center_x = rect.x0 + (rect.width - text_width) / 2
        center_y = rect.y0 + (rect.height - text_height) / 2 + text_height  # Adjust for baseline
        
        # Add the page number text
        page.insert_text((center_x, center_y), text, fontsize=12, fontname="helv", color=(0, 0, 0))

    # Save the modified PDF to a new file
    doc.save(output_pdf_path)

def print_help():
    help_message = """
    Usage: python add_page_numbers.py [-s] [-h] [<input_pdf_path>]
    This script adds page numbers to PDF files.

    Arguments:
    -s               Stitch all matching PDF files together, then add page numbers and export as a single file.
    -h               Display this help message.
    <input_pdf_path> (Optional) Path to the input PDF file. If not provided, all PDF files in the current directory 
                     that do not have '-n.pdf' in their filename will be processed individually.

    The output file for individual processing will be named the same as the input file with '-n' appended before the file extension.
    For example, if the input file is 'somefile.pdf', the output file will be 'somefile-n.pdf'.

    The output file for stitching will be named with the current timestamp.
    """
    print(help_message)

def process_all_pdfs():
    for input_pdf_path in glob.glob("*.pdf"):
        if not input_pdf_path.endswith("-n.pdf"):
            file_name, file_extension = os.path.splitext(input_pdf_path)
            output_pdf_path = f"{file_name}-n{file_extension}"
            add_page_numbers(input_pdf_path, output_pdf_path)
            print(f"Page numbers added. Output file saved as '{output_pdf_path}'")

def stitch_and_process_pdfs():
    pdf_files = [f for f in glob.glob("*.pdf") if not f.endswith("-n.pdf")]
    if not pdf_files:
        print("No PDF files to stitch and process.")
        return

    # Create a new document to stitch all PDFs together
    stitched_doc = fitz.open()
    for pdf_file in pdf_files:
        doc = fitz.open(pdf_file)
        for page_num in range(len(doc)):
            stitched_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
    
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    stitched_output_path = f"stitched_{timestamp}.pdf"
    stitched_doc.save(stitched_output_path)
    stitched_doc.close()
    
    # Add page numbers to the stitched PDF
    output_pdf_path = f"stitched_{timestamp}-n.pdf"
    add_page_numbers(stitched_output_path, output_pdf_path)
    os.remove(stitched_output_path)
    print(f"Stitched and page-numbered PDF saved as '{output_pdf_path}'")

if __name__ == "__main__":
    if len(sys.argv) > 2:
        print_help()
        sys.exit(1)

    if len(sys.argv) == 2:
        if sys.argv[1] == "-h":
            print_help()
        elif sys.argv[1] == "-s":
            stitch_and_process_pdfs()
        else:
            input_pdf_path = sys.argv[1]
            if not os.path.isfile(input_pdf_path):
                print(f"Error: File '{input_pdf_path}' not found.")
                sys.exit(1)
            
            file_name, file_extension = os.path.splitext(input_pdf_path)
            output_pdf_path = f"{file_name}-n{file_extension}"
            
            add_page_numbers(input_pdf_path, output_pdf_path)
            print(f"Page numbers added. Output file saved as '{output_pdf_path}'")
    else:
        process_all_pdfs()