Browse Source

[FIX] folder hierarchy and some tricks :)

refactor_hedgedoc_parser
Nicolas JEUDY 7 months ago
parent
commit
846692b133
  1. 53
      generate_v3.py

53
generate_v3.py

@ -5,6 +5,12 @@ from bs4 import BeautifulSoup
import markdown
def parse_for_toc(content):
# TODO: ADD .TableofContent directly to the page (see hugo theme ?)
content = content.replace("[TOC]", "")
return content
def parse_for_notice(content):
result = re.findall(":::warning.*?:::", content, re.MULTILINE | re.DOTALL)
for notice in result:
@ -42,7 +48,7 @@ def parse_for_notice(content):
":::", "{{% /notice %}}"
),
)
print(content)
# print(content)
return content
@ -68,36 +74,58 @@ def parse_markdown_file(markdown_file_path, base_dir="./"):
heading_level = int(element.name[1])
# Determine the directory to create for the heading
if heading_level == current_heading_level:
heading_dir = os.path.join(last_heading_dir, heading_text)
else:
print(
"heading_level: %s(%s) , heading_text: %s, base_dir: %s, last_heading_dir: %s, current_heading_dir: %s "
% (
heading_level,
current_heading_level,
heading_text,
base_dir,
last_heading_dir,
current_heading_dir,
)
)
if heading_level == 1:
heading_dir = os.path.join(base_dir, heading_text)
current_heading_dir = heading_dir
last_heading_dir = base_dir
elif heading_level == current_heading_level:
heading_dir = os.path.join(
os.path.dirname(current_heading_dir), heading_text
)
last_heading_dir = heading_dir
current_heading_dir = heading_dir
elif heading_level >= current_heading_level:
heading_dir = os.path.join(current_heading_dir, heading_text)
last_heading_dir = current_heading_dir
current_heading_dir = heading_dir
else:
print("NOT SUPPORTED YET")
if not os.path.exists(heading_dir):
os.makedirs(heading_dir)
# add _index.md for page organization
with open(heading_dir + "/_index.md", "wb") as f:
print("Created", heading_dir + "/_index.md")
index = (
"""
index = """
+++
title = "%s"
title = "{heading_text}"
chapter = true
weight = 5
+++
# Basics
# {heading_text}
Discover what this Hugo theme is all about and the core-concepts behind it.
"""
% heading_text
""".format(
heading_text=heading_text
)
f.write(b"%s" % index.encode("utf-8"))
# Set the current heading level and directory
current_heading_level = heading_level
last_heading_dir = current_heading_dir
current_heading_dir = heading_dir
# last_heading_dir = current_heading_dir
# current_heading_dir = heading_dir
elif element.name == "hr":
current_heading_level = 0
@ -122,6 +150,7 @@ Discover what this Hugo theme is all about and the core-concepts behind it.
content = parse_for_notice(
response.content.decode("utf-8")
).encode("utf-8")
content = parse_for_toc(content.decode("utf-8")).encode("utf-8")
f.write(hugo_header.encode("utf-8"))
f.write(content.replace(b"---", b""))
print("Downloaded", doc_link, "to", file_path)

Loading…
Cancel
Save