An opiniated Python script for rendering Markdown files in the gemtext format.
Commited on by Robin Boers.
diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..1d953f4
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
+use nix
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..92b2793
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.direnv
diff --git a/gemdown.py b/gemdown.py
new file mode 100644
index 0000000..65ae5dc
--- /dev/null
+++ b/gemdown.py
@@ -0,0 +1,189 @@
+import re
+import sys
+
+# Gemdown works like this:
+#
+# - Quotes with line breaks are transformed to single lines.
+# - Paragraphs with line breaks are transformed to single lines, except when using a hard break (two spaces and then a line break).
+# - Lists are flattened, but if a list item contains just a link or image, it is transformed to a link.
+# - Images are transformed to links.
+# - Inline links are collected under the text.
+# - Headings of level 4-6 are interpreted as paragraphs.
+# - Numbered lists are interpreted as paragraphs.
+# - Extra newlines are keps
+# - Italic identifiers are kept, but bold mofifiers are removed.
+# - Horizontal lines are kept.
+
+link_pattern = r'\[(.*?)\]\(([^ \t\n\r\f\v]+)\)'
+image_pattern = r'!\[(.*?)\]\(([^ \t\n\r\f\v]+)\)'
+italics_pattern = r'\b_(.*?)_\b'
+
+def main() -> int:
+ gemtext = Gemdown().convert(sys.stdin)
+ print(gemtext)
+
+class Gemdown():
+ def convert(self, markdown):
+ self.is_in_quote = False
+ self.is_in_preformatted = False
+ self.links = ""
+ self.document = ""
+ self.collected_links = []
+
+ for line in markdown:
+ # Strip the newline off the end
+ if line.endswith("\n"):
+ self.line = line[0:len(line) - 1]
+ else:
+ self.line = line
+
+ self.parse_line()
+
+ return self.document
+
+ def parse_line(self):
+ # Strip bold modifiers
+ self.strip_bold()
+ self.replace_italics()
+
+ # State
+ if self.is_in_quote and not self.line.startswith(">"):
+ self.is_in_quote = False
+
+ # Preformatted
+ if self.is_in_preformatted and not self.line.startswith("```"):
+ self.line_as_is()
+
+ elif self.is_in_preformatted and self.line.startswith("```"):
+ self.is_in_preformatted = False;
+ self.line_as_is()
+
+ elif self.line.startswith("```"):
+ self.is_in_preformatted = True;
+ self.line_as_is()
+
+ # Quotes
+ elif self.line.startswith(">") and self.is_in_quote:
+ self.collect_links()
+ self.add_text(self.rest_of_line())
+
+ elif self.line.startswith(">") and not self.is_in_quote:
+ self.is_in_quote = True
+
+ self.collect_links()
+ self.add_text("> " + self.rest_of_line())
+
+ # Headings
+ elif self.is_heading():
+ self.collect_links()
+ self.line_as_is()
+
+ # Horizontal lines
+ elif self.is_hr():
+ self.line_as_is()
+
+ # List item
+ elif self.is_list_item():
+ list_item_content = self.rest_of_line().strip()
+
+ # Single link or image in a list item or line
+ if self.is_link(list_item_content) or self.is_image(list_item_content):
+ text = self.link.group(1)
+ location = self.link.group(2)
+
+ self.add_line("=> " + location + " " + text)
+
+ else:
+ self.collect_links()
+ self.add_line("* " + list_item_content)
+
+ # Links
+ elif self.is_link(self.line) or self.is_image(self.line):
+ text = self.link.group(1)
+ location = self.link.group(2)
+
+ self.add_line("=> " + location + " " + text)
+
+ # Empty lines
+ elif self.line.strip() == "":
+ self.insert_collected_links()
+ self.empty_line()
+
+ # Hard breaks
+ elif self.line.endswith(" "):
+ self.collect_links()
+ self.add_line()
+
+ # Normal text
+ else:
+ self.collect_links()
+ self.add_text()
+
+ def strip_bold(self):
+ self.line = self.line.replace("**", "")
+
+ def replace_italics(self):
+ def replace(match):
+ return f'*{match.group(1)}*'
+
+ self.line = re.sub(italics_pattern, replace, self.line)
+
+ def line_as_is(self):
+ return self.add_line()
+
+ def rest_of_line(self):
+ return self.line[1:len(self.line)].strip()
+
+ def empty_line(self):
+ if not self.document.endswith("\n"):
+ self.add_line()
+ self.add_line()
+ else:
+ self.add_line()
+
+ def add_text(self, text = None):
+ if not text: text = self.line
+ self.document += text + " "
+
+ def add_line(self, line = None):
+ if not line: line = self.line
+ self.document += line.strip() + "\n"
+
+ def collect_links(self):
+ def replace_link(match):
+ link_text = match.group(1)
+ self.collected_links.append((match.group(1), match.group(2)))
+ return link_text
+
+ self.line = re.sub(link_pattern, replace_link, self.line)
+
+ def insert_collected_links(self):
+ if self.collected_links != []:
+ self.empty_line()
+
+ for link in self.collected_links:
+ self.add_line("=> " + link[1] + " " + link[0])
+
+ self.collected_links = []
+
+ def is_heading(self, n = None):
+ if n: return self.line.startswith("#" * n) and self.line[n] != "#"
+ else: return self.is_heading(1) or self.is_heading(2) or self.is_heading(3)
+
+ def is_hr(self):
+ return False
+ # return self.is_list_item() and self.line.all()
+
+ def is_list_item(self):
+ return self.line.strip().startswith("*") or self.line.strip().startswith("-")
+
+ def is_link(self, text) -> bool:
+ self.link = re.fullmatch(link_pattern, text);
+ return self.link
+
+ def is_image(self, text) -> bool:
+ self.link = re.fullmatch(image_pattern, text)
+ return self.link
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/sample.md b/sample.md
new file mode 100644
index 0000000..95ae0d3
--- /dev/null
+++ b/sample.md
@@ -0,0 +1,37 @@
+# Marked - Markdown Parser
+
+[Marked](marked) lets you convert [Markdown](markdown) into HTML. Markdown is a simple text format whose goal is to be very _easy to read and write_, even when not converted to HTML. This demo page will let you type anything you like and see how it gets converted. Live. No more waiting around.
+
+Something else du_punkto du_punkto.
+
+
+## How To Use The Demo
+
+That's it. Pretty simple. There's also a drop-down option above to switch between various views:
+
+- **Preview:** A *live* display of the generated HTML as it would render in a browser.
+- **HTML Source:** The generated HTML before your browser makes it pretty.
+- **Lexer Data:** What [Marked](marked) uses internally, in case you like gory stuff like this.
+- **Quick Reference:** A brief run-down of how to format things using [Markdown](markdown).
+
+## Rather have an image?
+
+
+
+## Why Markdown?
+
+It's easy. It's not overly bloated, unlike HTML. Also, as the creator of [Markdown](markdow) says,
+
+> The overriding design goal for Markdown's
+> formatting syntax is to make it as readable
+> as possible. The idea is that a
+> Markdown-formatted document should be
+> publishable as-is, as plain text, without
+> looking like it's been marked up with tags
+> or formatting instructions.
+
+Ready to start writing?
+Try searching on [Google](https://google.com) for more info!
+
+- [Google](https://google.com)
+
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..b547d22
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,7 @@
+{ pkgs ? import <nixpkgs> {} }:
+
+with pkgs;
+
+let inherit (lib) optional optionals; in mkShell {
+ buildInputs = [ (python3.withPackages (ps: with ps; [ tkinter ])) ];
+}