You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
109 lines
3.8 KiB
109 lines
3.8 KiB
import io
|
|
import os
|
|
from bs4 import BeautifulSoup, Comment
|
|
import markdown
|
|
import ipdb
|
|
|
|
parent_dir = "./content"
|
|
|
|
|
|
def parse_markdown_file(file_path):
|
|
with open(file_path, "r") as file:
|
|
markdown_text = file.read()
|
|
html = markdown.markdown(markdown_text)
|
|
print(html)
|
|
headings = []
|
|
current_level = 0
|
|
for line in html.split("\n"):
|
|
if line.startswith("<h1>"):
|
|
current_level = 1
|
|
headings.append(
|
|
{"level": current_level, "text": line[4:-5], "children": []}
|
|
)
|
|
elif line.startswith("<h2>"):
|
|
if current_level < 2:
|
|
current_level = 2
|
|
headings[-1]["children"].append(
|
|
{"level": current_level, "text": line[4:-5], "children": []}
|
|
)
|
|
else:
|
|
headings[-1]["children"].append(
|
|
{"level": current_level, "text": line[4:-5], "children": []}
|
|
)
|
|
elif line.startswith("<h3>"):
|
|
if current_level < 3:
|
|
current_level = 3
|
|
headings[-1]["children"][-1]["children"].append(
|
|
{"level": current_level, "text": line[4:-5], "children": []}
|
|
)
|
|
else:
|
|
headings[-1]["children"][-1]["children"].append(
|
|
{"level": current_level, "text": line[4:-5], "children": []}
|
|
)
|
|
return headings
|
|
|
|
|
|
def parse_markdown_file_2(file_path):
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
html = markdown.markdown(content)
|
|
print(html)
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
headings = []
|
|
|
|
def parse_element(element, level):
|
|
print(element)
|
|
if element.name == "h1":
|
|
heading = {
|
|
"text": element.text.strip(),
|
|
"level": level,
|
|
"subheadings": [],
|
|
"links": [],
|
|
}
|
|
headings.append(heading)
|
|
elif element.name == "h2":
|
|
subheading = {
|
|
"text": element.text.strip(),
|
|
"level": level,
|
|
"subheadings": [],
|
|
"links": [],
|
|
}
|
|
headings[-1]["subheadings"].append(subheading)
|
|
elif element.name == "h3":
|
|
subsubheading = {"text": element.text.strip(), "level": level, "links": []}
|
|
headings[-1]["subheadings"][-1]["subheadings"].append(subsubheading)
|
|
elif element.name == "ul":
|
|
links = []
|
|
for li in element.find_all("li"):
|
|
link = li.find("a")
|
|
if link is not None:
|
|
links.append({"text": link.text.strip(), "url": link["href"]})
|
|
if level == 1:
|
|
headings[-1]["links"].extend(links)
|
|
elif level == 2:
|
|
headings[-1]["subheadings"][-1]["links"].extend(links)
|
|
elif level == 3:
|
|
headings[-1]["subheadings"][-1]["subheadings"][-1]["links"].extend(
|
|
links
|
|
)
|
|
|
|
for child in element.children:
|
|
if isinstance(child, str) or isinstance(child, Comment):
|
|
continue
|
|
parse_element(child, level + 1)
|
|
|
|
parse_element(soup, 0)
|
|
|
|
return headings
|
|
|
|
|
|
headings = parse_markdown_file_2("content/posts/my-first-post.md")
|
|
print(headings)
|
|
for heading in headings:
|
|
print(f"Titre de niveau {heading['level']}: {heading['text']}")
|
|
for subheading in heading["children"]:
|
|
print(f" Sous-titre de niveau {subheading['level']}: {subheading['text']}")
|
|
for subsubheading in subheading["children"]:
|
|
print(
|
|
f" Sous-sous-titre de niveau {subsubheading['level']}: {subsubheading['text']}"
|
|
)
|