hugo-from-hedgedoc/generate_v3.py


								import os

								import re

								import requests

								from bs4 import BeautifulSoup

								import markdown


								def parse_for_toc(content):

								    # TODO: ADD .TableofContent directly to the page (see hugo theme ?)

								    content = content.replace("[TOC]", "")

								    return content


								def parse_for_notice(content):

								    result = re.findall(":::warning.*?:::", content, re.MULTILINE | re.DOTALL)

								    for notice in result:

								        old = notice

								        content = content.replace(

								            old,

								            notice.replace(":::warning", "{{% notice info %}}").replace(

								                ":::", "{{% /notice %}}"

								            ),

								        )

								    result = re.findall(":::info.*?:::", content, re.MULTILINE | re.DOTALL)

								    for notice in result:

								        old = notice

								        content = content.replace(

								            old,

								            notice.replace(":::info", "{{% notice note %}}").replace(

								                ":::", "{{% /notice %}}"

								            ),

								        )

								    result = re.findall(":::success.*?:::", content, re.MULTILINE | re.DOTALL)

								    for notice in result:

								        old = notice

								        content = content.replace(

								            old,

								            notice.replace(":::success", "{{% notice tip %}}").replace(

								                ":::", "{{% /notice %}}"

								            ),

								        )

								    result = re.findall(":::danger.*?:::", content, re.MULTILINE | re.DOTALL)

								    for notice in result:

								        old = notice

								        content = content.replace(

								            old,

								            notice.replace(":::danger", "{{% notice warning %}}").replace(

								                ":::", "{{% /notice %}}"

								            ),

								        )

								    # print(content)

								    return content


								def parse_markdown_file(markdown_file_path, base_dir="./"):

								    with open(markdown_file_path, "r") as f:

								        markdown_text = f.read()


								    html = markdown.markdown(markdown_text, extensions=["fenced_code"])


								    soup = BeautifulSoup(html, "html.parser")


								    if not os.path.exists(base_dir):

								        os.makedirs(base_dir)


								    current_heading_level = 1

								    current_heading_dir = base_dir

								    last_heading_dir = base_dir


								    for element in soup.children:

								        if element.name in ["h1", "h2", "h3"]:

								            # Get the text of the heading and the heading level

								            heading_text = element.text.strip()

								            heading_level = int(element.name[1])


								            # Determine the directory to create for the heading

								            print(

								                "heading_level: %s(%s) , heading_text: %s, base_dir: %s, last_heading_dir: %s, current_heading_dir: %s "

								                % (

								                    heading_level,

								                    current_heading_level,

								                    heading_text,

								                    base_dir,

								                    last_heading_dir,

								                    current_heading_dir,

								                )

								            )

								            if heading_level == 1:

								                heading_dir = os.path.join(base_dir, heading_text)

								                current_heading_dir = heading_dir

								                last_heading_dir = base_dir

								            elif heading_level == current_heading_level:

								                heading_dir = os.path.join(

								                    os.path.dirname(current_heading_dir), heading_text

								                )

								                last_heading_dir = heading_dir

								                current_heading_dir = heading_dir

								            elif heading_level >= current_heading_level:

								                heading_dir = os.path.join(current_heading_dir, heading_text)

								                last_heading_dir = current_heading_dir

								                current_heading_dir = heading_dir

								            else:

								                print("NOT SUPPORTED YET")


								            if not os.path.exists(heading_dir):

								                os.makedirs(heading_dir)

								                # add _index.md for page organization

								                with open(heading_dir + "/_index.md", "wb") as f:

								                    print("Created", heading_dir + "/_index.md")

								                    index = """

								+++

								title = "{heading_text}"

								chapter = true

								weight = 5

								+++


								# {heading_text}


								Discover what this Hugo theme is all about and the core-concepts behind it.

								                        """.format(

								                        heading_text=heading_text

								                    )

								                    f.write(b"%s" % index.encode("utf-8"))


								            # Set the current heading level and directory

								            current_heading_level = heading_level

								            # last_heading_dir = current_heading_dir

								            # current_heading_dir = heading_dir


								        elif element.name == "hr":

								            current_heading_level = 0

								            current_heading_dir = base_dir


								        elif element.name == "ul" and current_heading_level != 0:

								            # Get the links in the list

								            links = element.find_all("a")

								            for link in links:

								                # Get the text and href of the link

								                link_text = link.text.strip()

								                link_url = link["href"]


								                file_path = os.path.join(

								                    current_heading_dir, os.path.basename(link_text)

								                )

								                if link_url:

								                    with open(file_path + ".md", "wb") as f:

								                        doc_link = link_url + "/download"

								                        response = requests.get(doc_link)

								                        hugo_header = '---\ntitle: "' + link_text + '"\n---\n\n'

								                        content = parse_for_notice(

								                            response.content.decode("utf-8")

								                        ).encode("utf-8")

								                        content = parse_for_toc(content.decode("utf-8")).encode("utf-8")

								                        f.write(hugo_header.encode("utf-8"))

								                        f.write(content.replace(b"---", b""))

								                        print("Downloaded", doc_link, "to", file_path)


								headings = parse_markdown_file("content/my-first-post.md", base_dir="./content")

								print(headings)