hugo-from-hedgedoc/generate_v3.py

import os
import requests
from bs4 import BeautifulSoup
import markdown


def parse_markdown_file(markdown_file_path, base_dir="./"):
    with open(markdown_file_path, "r") as f:
        markdown_text = f.read()

    html = markdown.markdown(markdown_text, extensions=["fenced_code"])

    soup = BeautifulSoup(html, "html.parser")

    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    current_heading_level = 1
    current_heading_dir = base_dir
    last_heading_dir = base_dir

    for element in soup.children:
        if element.name in ["h1", "h2", "h3"]:
            # Get the text of the heading and the heading level
            heading_text = element.text.strip()
            heading_level = int(element.name[1])

            # Determine the directory to create for the heading
            if heading_level == current_heading_level:
                heading_dir = os.path.join(last_heading_dir, heading_text)
            else:
                heading_dir = os.path.join(current_heading_dir, heading_text)

            if not os.path.exists(heading_dir):
                os.makedirs(heading_dir)

            # Set the current heading level and directory
            current_heading_level = heading_level
            last_heading_dir = current_heading_dir
            current_heading_dir = heading_dir

        elif element.name == "hr":
            current_heading_level = 0
            current_heading_dir = base_dir

        elif element.name == "ul" and current_heading_level != 0:
            # Get the links in the list
            links = element.find_all("a")
            for link in links:
                # Get the text and href of the link
                link_text = link.text.strip()
                link_url = link["href"]

                file_path = os.path.join(
                    current_heading_dir, os.path.basename(link_text)
                )
                if link_url:
                    with open(file_path, "wb") as f:
                        response = requests.get(link_url[:-1] + "/download")
                        f.write(response.content)
                        print("Downloaded", link_url, "to", file_path)


headings = parse_markdown_file(
    "content/posts/my-first-post.md", base_dir="./content/posts"
)
print(headings)