import io import os import pypandoc import panflute import requests import ipdb parent_dir = "./content" def action(elem, doc): if isinstance(elem, panflute.Image): doc.images.append(elem) elif isinstance(elem, panflute.Link): doc.links.append(elem) def find_link(elem, doc): if type(elem) == panflute.elements.Link: doc.link.append(elem) print(panflute.stringify(elem)) def create_folder_h3(elem, doc): if type(elem) == panflute.elements.Header and elem.level == 3: directory = panflute.stringify(elem) path = os.path.join(parent_dir, directory) print(path) def download(link): r = requests.get(link) return r.text def create_folder_h2(elem, doc): if type(elem) == panflute.elements.Header and elem.level == 2: doc.link = [] directory = panflute.stringify(elem) path = os.path.join(parent_dir, directory) os.makedirs(path, exist_ok=True) elem.next.walk(find_link) for link in doc.link: if link.url[-1] == "#": md_content = download(link.url[:-1] + "/download") print( parent_dir + "/" + directory + "/" + panflute.stringify(link) + ".md" ) with open( parent_dir + "/" + directory + "/" + panflute.stringify(link) + ".md", "w", ) as f: hugo_head = """ --- title: %s date: 2022-10-29T15:59:37+02:00 draft: false --- """ % ( panflute.stringify(link) ) f.write(hugo_head + md_content) if __name__ == "__main__": data = pypandoc.convert_file("content/posts/my-first-post.md", "json") doc = panflute.load(io.StringIO(data)) doc.images = [] doc.links = [] doc = panflute.run_filter(create_folder_h2, doc=doc)