You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

131 lines
4.4 KiB

  1. import os
  2. import re
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import markdown
  6. def parse_for_notice(content):
  7. result = re.findall(":::warning.*?:::", content, re.MULTILINE | re.DOTALL)
  8. for notice in result:
  9. old = notice
  10. content = content.replace(
  11. old,
  12. notice.replace(":::warning", "{{% notice info %}}").replace(
  13. ":::", "{{% /notice %}}"
  14. ),
  15. )
  16. result = re.findall(":::info.*?:::", content, re.MULTILINE | re.DOTALL)
  17. for notice in result:
  18. old = notice
  19. content = content.replace(
  20. old,
  21. notice.replace(":::info", "{{% notice note %}}").replace(
  22. ":::", "{{% /notice %}}"
  23. ),
  24. )
  25. result = re.findall(":::success.*?:::", content, re.MULTILINE | re.DOTALL)
  26. for notice in result:
  27. old = notice
  28. content = content.replace(
  29. old,
  30. notice.replace(":::success", "{{% notice tip %}}").replace(
  31. ":::", "{{% /notice %}}"
  32. ),
  33. )
  34. result = re.findall(":::danger.*?:::", content, re.MULTILINE | re.DOTALL)
  35. for notice in result:
  36. old = notice
  37. content = content.replace(
  38. old,
  39. notice.replace(":::danger", "{{% notice warning %}}").replace(
  40. ":::", "{{% /notice %}}"
  41. ),
  42. )
  43. print(content)
  44. return content
  45. def parse_markdown_file(markdown_file_path, base_dir="./"):
  46. with open(markdown_file_path, "r") as f:
  47. markdown_text = f.read()
  48. html = markdown.markdown(markdown_text, extensions=["fenced_code"])
  49. soup = BeautifulSoup(html, "html.parser")
  50. if not os.path.exists(base_dir):
  51. os.makedirs(base_dir)
  52. current_heading_level = 1
  53. current_heading_dir = base_dir
  54. last_heading_dir = base_dir
  55. for element in soup.children:
  56. if element.name in ["h1", "h2", "h3"]:
  57. # Get the text of the heading and the heading level
  58. heading_text = element.text.strip()
  59. heading_level = int(element.name[1])
  60. # Determine the directory to create for the heading
  61. if heading_level == current_heading_level:
  62. heading_dir = os.path.join(last_heading_dir, heading_text)
  63. else:
  64. heading_dir = os.path.join(current_heading_dir, heading_text)
  65. if not os.path.exists(heading_dir):
  66. os.makedirs(heading_dir)
  67. # add _index.md for page organization
  68. with open(heading_dir + "/_index.md", "wb") as f:
  69. print("Created", heading_dir + "/_index.md")
  70. index = (
  71. """
  72. +++
  73. title = "%s"
  74. chapter = true
  75. weight = 5
  76. +++
  77. # Basics
  78. Discover what this Hugo theme is all about and the core-concepts behind it.
  79. """
  80. % heading_text
  81. )
  82. f.write(b"%s" % index.encode("utf-8"))
  83. # Set the current heading level and directory
  84. current_heading_level = heading_level
  85. last_heading_dir = current_heading_dir
  86. current_heading_dir = heading_dir
  87. elif element.name == "hr":
  88. current_heading_level = 0
  89. current_heading_dir = base_dir
  90. elif element.name == "ul" and current_heading_level != 0:
  91. # Get the links in the list
  92. links = element.find_all("a")
  93. for link in links:
  94. # Get the text and href of the link
  95. link_text = link.text.strip()
  96. link_url = link["href"]
  97. file_path = os.path.join(
  98. current_heading_dir, os.path.basename(link_text)
  99. )
  100. if link_url:
  101. with open(file_path + ".md", "wb") as f:
  102. doc_link = link_url + "/download"
  103. response = requests.get(doc_link)
  104. hugo_header = '---\ntitle: "' + link_text + '"\n---\n\n'
  105. content = parse_for_notice(
  106. response.content.decode("utf-8")
  107. ).encode("utf-8")
  108. f.write(hugo_header.encode("utf-8"))
  109. f.write(content.replace(b"---", b""))
  110. print("Downloaded", doc_link, "to", file_path)
  111. headings = parse_markdown_file("content/my-first-post.md", base_dir="./content")
  112. print(headings)