You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

109 lines
3.8 KiB

import io
import os
from bs4 import BeautifulSoup, Comment
import markdown
import ipdb
parent_dir = "./content"
def parse_markdown_file(file_path):
with open(file_path, "r") as file:
markdown_text = file.read()
html = markdown.markdown(markdown_text)
print(html)
headings = []
current_level = 0
for line in html.split("\n"):
if line.startswith("<h1>"):
current_level = 1
headings.append(
{"level": current_level, "text": line[4:-5], "children": []}
)
elif line.startswith("<h2>"):
if current_level < 2:
current_level = 2
headings[-1]["children"].append(
{"level": current_level, "text": line[4:-5], "children": []}
)
else:
headings[-1]["children"].append(
{"level": current_level, "text": line[4:-5], "children": []}
)
elif line.startswith("<h3>"):
if current_level < 3:
current_level = 3
headings[-1]["children"][-1]["children"].append(
{"level": current_level, "text": line[4:-5], "children": []}
)
else:
headings[-1]["children"][-1]["children"].append(
{"level": current_level, "text": line[4:-5], "children": []}
)
return headings
def parse_markdown_file_2(file_path):
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
html = markdown.markdown(content)
print(html)
soup = BeautifulSoup(html, "html.parser")
headings = []
def parse_element(element, level):
print(element)
if element.name == "h1":
heading = {
"text": element.text.strip(),
"level": level,
"subheadings": [],
"links": [],
}
headings.append(heading)
elif element.name == "h2":
subheading = {
"text": element.text.strip(),
"level": level,
"subheadings": [],
"links": [],
}
headings[-1]["subheadings"].append(subheading)
elif element.name == "h3":
subsubheading = {"text": element.text.strip(), "level": level, "links": []}
headings[-1]["subheadings"][-1]["subheadings"].append(subsubheading)
elif element.name == "ul":
links = []
for li in element.find_all("li"):
link = li.find("a")
if link is not None:
links.append({"text": link.text.strip(), "url": link["href"]})
if level == 1:
headings[-1]["links"].extend(links)
elif level == 2:
headings[-1]["subheadings"][-1]["links"].extend(links)
elif level == 3:
headings[-1]["subheadings"][-1]["subheadings"][-1]["links"].extend(
links
)
for child in element.children:
if isinstance(child, str) or isinstance(child, Comment):
continue
parse_element(child, level + 1)
parse_element(soup, 0)
return headings
headings = parse_markdown_file_2("content/posts/my-first-post.md")
print(headings)
for heading in headings:
print(f"Titre de niveau {heading['level']}: {heading['text']}")
for subheading in heading["children"]:
print(f" Sous-titre de niveau {subheading['level']}: {subheading['text']}")
for subsubheading in subheading["children"]:
print(
f" Sous-sous-titre de niveau {subsubheading['level']}: {subsubheading['text']}"
)