Information Technology Grimoire

Version .0.0.1

IT Notes from various projects because I forget, and hopefully they help you too.

Python Change Markdown Globally

Python Script to Globally Modify markdown

I needed to massage all of my several hundred articles. Saving this script for the record. Might document it later. New sites won’t need this. I had a custom problem, but documenting my solution.

import os
import re
import shutil
import sys

def rename_file(old_name, new_name, file_path):
    """Rename a file if it matches the old_name."""
    if os.path.basename(file_path) == old_name:
        new_file_path = os.path.join(os.path.dirname(file_path), new_name)
        os.rename(file_path, new_file_path)
        print(f"Renamed '{file_path}' to '{new_file_path}'")

def replace_text_in_file2(file_path, search_text, replace_text):
    """Replace text in a file."""
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    new_content = re.sub(search_text, replace_text, content)

    if new_content != content:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(new_content)
        print(f"Updated text in '{file_path}'")

def replace_text_in_file(file_path, search_text, replace_text):
    """Replace text in a file."""
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Escape special characters in the search text
    escaped_search_text = re.escape(search_text)
    
    new_content = re.sub(escaped_search_text, replace_text, content)

    if new_content != content:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(new_content)
        print(f"Updated text in '{file_path}'")

def remove_multiline_string(file_path, multiline_string):
    """Remove a specific multiline string from a file."""
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Define the pattern to match the multiline string and replace it with an empty string
    pattern = re.compile(re.escape(multiline_string), re.DOTALL)
    new_content = pattern.sub('', content)

    if new_content != content:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(new_content)
        print(f"Removed specified multiline string from '{file_path}'")


def transform_jekyll_to_hugo(file_path):
    """Transform Jekyll front matter headers to Hugo front matter headers in a file."""
    # Create a backup of the file
    #backup_file_path = file_path + '.backup'
    #shutil.copy(file_path, backup_file_path)
    #print(f"Backup created at '{backup_file_path}'")
    
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    new_lines = []
    jekyll_front_matter = []
    in_front_matter = False
    title = ''
    date = ''

    for line in lines:
        if line.strip() == '---':
            if in_front_matter:
                # Process the captured Jekyll front matter
                for fm_line in jekyll_front_matter:
                    if fm_line.startswith('title:'):
                        title = fm_line.split(':', 1)[1].strip()
                    elif fm_line.startswith('date:'):
                        date = fm_line.split(':', 1)[1].strip()
                
                # Replace Jekyll front matter with Hugo front matter
                new_lines.extend([
                    "---\n",
                    "# Title, summary, and page position.\n",
                    f"linktitle: {title}\n",
                    f"summary: {title} Documentation\n",
                    "weight: 1\n",
                    "icon: book\n",
                    "icon_pack: fas\n",
                    "\n# Page metadata.\n",
                    f"title: {title}\n",
                    f"date: {date}\n",
                    "type: book # Do not modify.\n",
                    "---\n"
                ])
                in_front_matter = False
            else:
                in_front_matter = True
                jekyll_front_matter = [] # Reset the front matter list
        elif in_front_matter:
            jekyll_front_matter.append(line)
        else:
            new_lines.append(line)

    new_content = ''.join(new_lines)

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(new_content)
    print(f"Transformed Jekyll front matter to Hugo front matter in '{file_path}'")


def process_files(directory):
    """Process all files in a directory tree."""
    print(f"attempting {directory}")
    for root, dirs, files in os.walk(directory):
        #print(root)
        for file in files:
            file_path = os.path.join(root, file)

            #remove_multiline_string(file_path, multiline_string_to_remove)
            
            # Transform Jekyll front matter to Hugo for Markdown files
            #if file_path.endswith('.md') or file_path.endswith('.markdown'):
            #    transform_jekyll_to_hugo(file_path)

            # Rename 'index.markdown' to '_index.md'
            #rename_file('index.markdown', '_index.md', file_path)

            # Example: replace 'old_text' with 'new_text' in all files
            replace_text_in_file(file_path, '](/assets/media/', '](/i/')

# Replace 'your_directory_path' with the path to the directory you want to process
directory = '/home/james/v/netlifysites/site.grimoire/content'

# The multiline string to be removed
multiline_string_to_remove = '''<details open markdown="block">
  <summary>
    Table of contents
  </summary>
  {: .text-delta }
1. TOC
{:toc}
</details>'''

process_files(directory)