Python Change Markdown Globally
Python Script to Globally Modify markdown
I needed to massage all of my several hundred articles. Saving this script for the record. Might document it later. New sites won’t need this. I had a custom problem, but documenting my solution.
import os
import re
import shutil
import sys
def rename_file(old_name, new_name, file_path):
"""Rename a file if it matches the old_name."""
if os.path.basename(file_path) == old_name:
new_file_path = os.path.join(os.path.dirname(file_path), new_name)
os.rename(file_path, new_file_path)
print(f"Renamed '{file_path}' to '{new_file_path}'")
def replace_text_in_file2(file_path, search_text, replace_text):
"""Replace text in a file."""
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
new_content = re.sub(search_text, replace_text, content)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_content)
print(f"Updated text in '{file_path}'")
def replace_text_in_file(file_path, search_text, replace_text):
"""Replace text in a file."""
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Escape special characters in the search text
escaped_search_text = re.escape(search_text)
new_content = re.sub(escaped_search_text, replace_text, content)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_content)
print(f"Updated text in '{file_path}'")
def remove_multiline_string(file_path, multiline_string):
"""Remove a specific multiline string from a file."""
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Define the pattern to match the multiline string and replace it with an empty string
pattern = re.compile(re.escape(multiline_string), re.DOTALL)
new_content = pattern.sub('', content)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_content)
print(f"Removed specified multiline string from '{file_path}'")
def transform_jekyll_to_hugo(file_path):
"""Transform Jekyll front matter headers to Hugo front matter headers in a file."""
# Create a backup of the file
#backup_file_path = file_path + '.backup'
#shutil.copy(file_path, backup_file_path)
#print(f"Backup created at '{backup_file_path}'")
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
new_lines = []
jekyll_front_matter = []
in_front_matter = False
title = ''
date = ''
for line in lines:
if line.strip() == '---':
if in_front_matter:
# Process the captured Jekyll front matter
for fm_line in jekyll_front_matter:
if fm_line.startswith('title:'):
title = fm_line.split(':', 1)[1].strip()
elif fm_line.startswith('date:'):
date = fm_line.split(':', 1)[1].strip()
# Replace Jekyll front matter with Hugo front matter
new_lines.extend([
"---\n",
"# Title, summary, and page position.\n",
f"linktitle: {title}\n",
f"summary: {title} Documentation\n",
"weight: 1\n",
"icon: book\n",
"icon_pack: fas\n",
"\n# Page metadata.\n",
f"title: {title}\n",
f"date: {date}\n",
"type: book # Do not modify.\n",
"---\n"
])
in_front_matter = False
else:
in_front_matter = True
jekyll_front_matter = [] # Reset the front matter list
elif in_front_matter:
jekyll_front_matter.append(line)
else:
new_lines.append(line)
new_content = ''.join(new_lines)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_content)
print(f"Transformed Jekyll front matter to Hugo front matter in '{file_path}'")
def process_files(directory):
"""Process all files in a directory tree."""
print(f"attempting {directory}")
for root, dirs, files in os.walk(directory):
#print(root)
for file in files:
file_path = os.path.join(root, file)
#remove_multiline_string(file_path, multiline_string_to_remove)
# Transform Jekyll front matter to Hugo for Markdown files
#if file_path.endswith('.md') or file_path.endswith('.markdown'):
# transform_jekyll_to_hugo(file_path)
# Rename 'index.markdown' to '_index.md'
#rename_file('index.markdown', '_index.md', file_path)
# Example: replace 'old_text' with 'new_text' in all files
replace_text_in_file(file_path, '](/assets/media/', '](/i/')
# Replace 'your_directory_path' with the path to the directory you want to process
directory = '/home/james/v/netlifysites/site.grimoire/content'
# The multiline string to be removed
multiline_string_to_remove = '''<details open markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>'''
process_files(directory)