import io import os from bs4 import BeautifulSoup, Comment import markdown import ipdb parent_dir = "./content" def parse_markdown_file(file_path): with open(file_path, "r") as file: markdown_text = file.read() html = markdown.markdown(markdown_text) print(html) headings = [] current_level = 0 for line in html.split("\n"): if line.startswith("

"): current_level = 1 headings.append( {"level": current_level, "text": line[4:-5], "children": []} ) elif line.startswith("

"): if current_level < 2: current_level = 2 headings[-1]["children"].append( {"level": current_level, "text": line[4:-5], "children": []} ) else: headings[-1]["children"].append( {"level": current_level, "text": line[4:-5], "children": []} ) elif line.startswith("

"): if current_level < 3: current_level = 3 headings[-1]["children"][-1]["children"].append( {"level": current_level, "text": line[4:-5], "children": []} ) else: headings[-1]["children"][-1]["children"].append( {"level": current_level, "text": line[4:-5], "children": []} ) return headings def parse_markdown_file_2(file_path): with open(file_path, "r", encoding="utf-8") as f: content = f.read() html = markdown.markdown(content) print(html) soup = BeautifulSoup(html, "html.parser") headings = [] def parse_element(element, level): print(element) if element.name == "h1": heading = { "text": element.text.strip(), "level": level, "subheadings": [], "links": [], } headings.append(heading) elif element.name == "h2": subheading = { "text": element.text.strip(), "level": level, "subheadings": [], "links": [], } headings[-1]["subheadings"].append(subheading) elif element.name == "h3": subsubheading = {"text": element.text.strip(), "level": level, "links": []} headings[-1]["subheadings"][-1]["subheadings"].append(subsubheading) elif element.name == "ul": links = [] for li in element.find_all("li"): link = li.find("a") if link is not None: links.append({"text": link.text.strip(), "url": link["href"]}) if level == 1: headings[-1]["links"].extend(links) elif level == 2: headings[-1]["subheadings"][-1]["links"].extend(links) elif level == 3: headings[-1]["subheadings"][-1]["subheadings"][-1]["links"].extend( links ) for child in element.children: if isinstance(child, str) or isinstance(child, Comment): continue parse_element(child, level + 1) parse_element(soup, 0) return headings headings = parse_markdown_file_2("content/posts/my-first-post.md") print(headings) for heading in headings: print(f"Titre de niveau {heading['level']}: {heading['text']}") for subheading in heading["children"]: print(f" Sous-titre de niveau {subheading['level']}: {subheading['text']}") for subsubheading in subheading["children"]: print( f" Sous-sous-titre de niveau {subsubheading['level']}: {subsubheading['text']}" )