~forks/gemdown.py

An opiniated Python script for rendering Markdown files in the gemtext format.

Initial commit

Commited on by Robin Boers.

diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..1d953f4
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
+use nix
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..92b2793
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.direnv
diff --git a/gemdown.py b/gemdown.py
new file mode 100644
index 0000000..65ae5dc
--- /dev/null
+++ b/gemdown.py
@@ -0,0 +1,189 @@
+import re
+import sys
+
+# Gemdown works like this:
+# 
+# - Quotes with line breaks are transformed to single lines.
+# - Paragraphs with line breaks are transformed to single lines, except when using a hard break (two spaces and then a line break).
+# - Lists are flattened, but if a list item contains just a link or image, it is transformed to a link.
+# - Images are transformed to links.
+# - Inline links are collected under the text.
+# - Headings of level 4-6 are interpreted as paragraphs.
+# - Numbered lists are interpreted as paragraphs.
+# - Extra newlines are keps
+# - Italic identifiers are kept, but bold mofifiers are removed.
+# - Horizontal lines are kept.
+
+link_pattern = r'\[(.*?)\]\(([^ \t\n\r\f\v]+)\)'
+image_pattern = r'!\[(.*?)\]\(([^ \t\n\r\f\v]+)\)'
+italics_pattern = r'\b_(.*?)_\b'
+
+def main() -> int:
+    gemtext = Gemdown().convert(sys.stdin)
+    print(gemtext)
+
+class Gemdown():
+    def convert(self, markdown):
+        self.is_in_quote = False
+        self.is_in_preformatted = False
+        self.links = ""
+        self.document = ""
+        self.collected_links = []
+
+        for line in markdown:
+            # Strip the newline off the end
+            if line.endswith("\n"): 
+                self.line = line[0:len(line) - 1]
+            else: 
+                self.line = line
+
+            self.parse_line()
+
+        return self.document
+
+    def parse_line(self):
+        # Strip bold modifiers
+        self.strip_bold()
+        self.replace_italics()
+
+        # State
+        if self.is_in_quote and not self.line.startswith(">"):
+            self.is_in_quote = False
+
+        # Preformatted
+        if self.is_in_preformatted and not self.line.startswith("```"):
+            self.line_as_is()
+
+        elif self.is_in_preformatted and self.line.startswith("```"):
+            self.is_in_preformatted = False;
+            self.line_as_is()
+
+        elif self.line.startswith("```"):
+            self.is_in_preformatted = True;
+            self.line_as_is()
+
+        # Quotes
+        elif self.line.startswith(">") and self.is_in_quote:
+            self.collect_links()
+            self.add_text(self.rest_of_line())
+
+        elif self.line.startswith(">") and not self.is_in_quote:
+            self.is_in_quote = True
+
+            self.collect_links()
+            self.add_text("> " + self.rest_of_line())
+
+        # Headings
+        elif self.is_heading(): 
+            self.collect_links()
+            self.line_as_is()
+
+        # Horizontal lines
+        elif self.is_hr():
+            self.line_as_is()
+
+        # List item
+        elif self.is_list_item():
+            list_item_content = self.rest_of_line().strip()
+
+            # Single link or image in a list item or line
+            if self.is_link(list_item_content) or self.is_image(list_item_content): 
+                text = self.link.group(1)
+                location = self.link.group(2)
+
+                self.add_line("=> " + location + " " + text)
+
+            else: 
+                self.collect_links()
+                self.add_line("* " + list_item_content)
+
+        # Links
+        elif self.is_link(self.line) or self.is_image(self.line):
+            text = self.link.group(1)
+            location = self.link.group(2)
+
+            self.add_line("=> " + location + " " + text)
+
+        # Empty lines
+        elif self.line.strip() == "":
+            self.insert_collected_links()
+            self.empty_line()
+                
+        # Hard breaks
+        elif self.line.endswith("  "): 
+            self.collect_links()
+            self.add_line()
+
+        # Normal text
+        else: 
+            self.collect_links()
+            self.add_text()
+
+    def strip_bold(self):
+        self.line = self.line.replace("**", "")
+        
+    def replace_italics(self):
+        def replace(match):
+            return f'*{match.group(1)}*'
+
+        self.line = re.sub(italics_pattern, replace, self.line)
+
+    def line_as_is(self):
+        return self.add_line()
+
+    def rest_of_line(self):
+        return self.line[1:len(self.line)].strip()
+
+    def empty_line(self):
+        if not self.document.endswith("\n"):
+            self.add_line()
+            self.add_line()
+        else: 
+            self.add_line()
+
+    def add_text(self, text = None):
+        if not text: text = self.line
+        self.document += text + " "
+
+    def add_line(self, line = None):
+        if not line: line = self.line
+        self.document += line.strip() + "\n"
+
+    def collect_links(self):
+        def replace_link(match):
+            link_text = match.group(1)
+            self.collected_links.append((match.group(1), match.group(2)))
+            return link_text
+
+        self.line = re.sub(link_pattern, replace_link, self.line)
+
+    def insert_collected_links(self):
+        if self.collected_links != []:
+            self.empty_line()
+
+            for link in self.collected_links:
+                self.add_line("=> " + link[1] + " " + link[0])
+
+            self.collected_links = []
+
+    def is_heading(self, n = None):
+        if n: return self.line.startswith("#" * n) and self.line[n] != "#"
+        else: return self.is_heading(1) or self.is_heading(2) or self.is_heading(3)
+
+    def is_hr(self):
+        return False
+        # return self.is_list_item() and self.line.all()
+
+    def is_list_item(self):
+        return self.line.strip().startswith("*") or self.line.strip().startswith("-")
+
+    def is_link(self, text) -> bool:
+        self.link = re.fullmatch(link_pattern, text);
+        return self.link
+
+    def is_image(self, text) -> bool:
+        self.link = re.fullmatch(image_pattern, text)
+        return self.link
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/sample.md b/sample.md
new file mode 100644
index 0000000..95ae0d3
--- /dev/null
+++ b/sample.md
@@ -0,0 +1,37 @@
+# Marked - Markdown Parser
+
+[Marked](marked) lets you convert [Markdown](markdown) into HTML. Markdown is a simple text format whose goal is to be very _easy to read and write_, even when not converted to HTML. This demo page will let you type anything you like and see how it gets converted. Live. No more waiting around.
+
+Something else du_punkto du_punkto.
+
+
+## How To Use The Demo
+
+That's it. Pretty simple. There's also a drop-down option above to switch between various views:
+
+- **Preview:** A *live* display of the generated HTML as it would render in a browser.
+- **HTML Source:** The generated HTML before your browser makes it pretty.
+- **Lexer Data:** What [Marked](marked) uses internally, in case you like gory stuff like this.
+- **Quick Reference:** A brief run-down of how to format things using [Markdown](markdown).
+
+## Rather have an image?
+
+![Some image](/image.jpg)
+
+## Why Markdown?
+
+It's easy. It's not overly bloated, unlike HTML. Also, as the creator of [Markdown](markdow) says,
+
+> The overriding design goal for Markdown's
+> formatting syntax is to make it as readable 
+> as possible. The idea is that a
+> Markdown-formatted document should be
+> publishable as-is, as plain text, without 
+> looking like it's been marked up with tags
+> or formatting instructions.
+
+Ready to start writing?
+Try searching on [Google](https://google.com) for more info!
+
+- [Google](https://google.com)
+
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..b547d22
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,7 @@
+{ pkgs ? import <nixpkgs> {} }:
+
+with pkgs;
+
+let inherit (lib) optional optionals; in mkShell {
+  buildInputs = [ (python3.withPackages (ps: with ps; [ tkinter ])) ];
+}