import os import requests from bs4 import BeautifulSoup import markdown def parse_markdown_file(markdown_file_path, base_dir="./"): with open(markdown_file_path, "r") as f: markdown_text = f.read() html = markdown.markdown(markdown_text, extensions=["fenced_code"]) soup = BeautifulSoup(html, "html.parser") if not os.path.exists(base_dir): os.makedirs(base_dir) current_heading_level = 1 current_heading_dir = base_dir last_heading_dir = base_dir for element in soup.children: if element.name in ["h1", "h2", "h3"]: # Get the text of the heading and the heading level heading_text = element.text.strip() heading_level = int(element.name[1]) # Determine the directory to create for the heading if heading_level == current_heading_level: heading_dir = os.path.join(last_heading_dir, heading_text) else: heading_dir = os.path.join(current_heading_dir, heading_text) if not os.path.exists(heading_dir): os.makedirs(heading_dir) # Set the current heading level and directory current_heading_level = heading_level last_heading_dir = current_heading_dir current_heading_dir = heading_dir elif element.name == "hr": current_heading_level = 0 current_heading_dir = base_dir elif element.name == "ul" and current_heading_level != 0: # Get the links in the list links = element.find_all("a") for link in links: # Get the text and href of the link link_text = link.text.strip() link_url = link["href"] file_path = os.path.join( current_heading_dir, os.path.basename(link_text) ) if link_url: with open(file_path, "wb") as f: response = requests.get(link_url[:-1] + "/download") f.write(response.content) print("Downloaded", link_url, "to", file_path) headings = parse_markdown_file( "content/posts/my-first-post.md", base_dir="./content/posts" ) print(headings)