#!/usr/bin/python3 """ Script to convert content from using Zola/Markdown do using Hugo/Org. """ from pathlib import Path import os import re import shutil import subprocess import tomllib def get_files(dir): markdown_files = set() data_files = set() for root, dirs, files in os.walk(dir): path = Path(root).relative_to(dir) for filename in files: filename_path = Path(path / filename) filename_extension = filename_path.suffix if filename_path.suffix == ".md": markdown_files.add(filename_path) else: data_files.add(filename_path) return markdown_files, data_files def copy_data_files(old_content_dir, new_content_dir, data_files): for path in data_files: old_path = old_content_dir / path new_path = new_content_dir / path new_path.parent.mkdir(mode=0o755, parents=True, exist_ok=True) shutil.copyfile(old_path, new_path) def convert_markdown_file(source_path, destination_path): with open(source_path, "r") as source_file: content = source_file.read() preamble, body = re.match( r'^\s*?\+\+\+\s*$\s*(.*)$\s*^\+\+\+\s*$\s*(.*)$', content, re.MULTILINE | re.DOTALL).groups() parsed_preamble = tomllib.loads(preamble) pandoc_output = subprocess.run( ["pandoc", "-f", "markdown", "-t", "org"], input=body.encode("utf-8"), stdout=subprocess.PIPE) new_body = pandoc_output.stdout.decode("utf-8").replace( "\n#+begin_html\n \n#+end_html\n", "\n# more\n", ).strip() new_body = re.sub( r'^#\+caption: (.*)', r'#+ATTR_HTML: :title \1 :alt \1', new_body, re.MULTILINE) new_body = re.sub( r'{{\s*download\(filename="(.*)"\)\s*}}', r'[[file:\1][Download]]', new_body) new_body = re.sub( r'\n:PROPERTIES:\n:CUSTOM_ID: [\w\.-]+\n:END:\n', r'\n', new_body) title = parsed_preamble.get("title", None) date = parsed_preamble.get("date", None) path = parsed_preamble.get("path", None) taxonomies = parsed_preamble.get("taxonomies", {}) categories = taxonomies.get("categories", []) tags = taxonomies.get("tags", []) extra = parsed_preamble.get("extra", {}) links = extra.get("links", {}) mastodon_link = links.get("mastodon", None) image = extra.get("image", None) preamble_lines = [] if title: preamble_lines.append(f"#+TITLE: {title}\n") if date: preamble_lines.append(f"#+DATE: {date}\n") if path: preamble_lines.append(f"#+URL: /{path}\n") if categories: preamble_lines.append(f"#+CATEGORIES[]: {' '.join(categories)}\n") if tags: preamble_lines.append(f"#+TAGS[]: {' '.join(tags)}\n") if mastodon_link: preamble_lines.append(f"#+MASTODON_LINK: {mastodon_link}\n") with open(destination_path, "w") as destination_file: for line in preamble_lines: destination_file.write(line) if preamble_lines and new_body: destination_file.write("\n") if new_body: destination_file.write(f"{new_body}\n") write_config(destination_path.parent / "config.toml", extra) def write_config(config_path, extra): image = extra.get("image", None) gallery = extra.get("gallery", None) if not (image or gallery): return output = [] if image: output.append(f"""\ [image] title = "{image['title']}" filename = "{image['filename']}" """) if gallery: for filename, entry in gallery.items(): output.append("[[gallery]]") if "title" in entry: output.append(f"title = \"{entry['title']}\"") if "description" in entry: output.append(f"description = \"{entry['description']}\"") output.append(f"filename = \"{filename}\"") if entry.get("featured", False): output.append("featured = true") output.append("") with open(config_path, "w") as config_file: config_file.write("\n".join(output)) def convert_markdown_files(old_content_dir, new_content_dir, markdown_files): for path in markdown_files: source_path = old_content_dir / path destination_path = new_content_dir / path.with_suffix(".org") convert_markdown_file(source_path, destination_path) def main(): project_dir = Path.cwd() old_content_dir = project_dir / "content.old" new_content_dir = project_dir / "content" markdown_files, data_files = get_files(old_content_dir) try: shutil.rmtree(new_content_dir) except FileNotFoundError: pass copy_data_files(old_content_dir=old_content_dir, new_content_dir=new_content_dir, data_files=data_files) convert_markdown_files(old_content_dir=old_content_dir, new_content_dir=new_content_dir, markdown_files=markdown_files) if __name__ == '__main__': main()