Generate article sitemap thing.

2016-09-02 23:32:55 +02:00
parent 7a08eccc10
commit 4b866eafef
6 changed files with 84 additions and 10 deletions
--- a/scripts/transform-file.py
+++ b/scripts/transform-file.py
@@ -14,6 +14,8 @@ import os
 import html
 import subprocess

+import yaml
+

 def read(path):
    with open(path) as f:
@@ -29,12 +31,20 @@ base_dir = os.path.split(script_dir)[0]
 template_dir = os.path.join(base_dir, 'template')
 template_base_file = os.path.join(template_dir, 'base.html')
 template_base = read(template_base_file)
+site_dir = os.path.join(base_dir, 'site')


 def pandoc(filename):
-    proc = subprocess.run(['pandoc', '--smart', filename], stdout=subprocess.PIPE)
+    proc = subprocess.run(['pandoc', '--smart', filename],
+                          stdout=subprocess.PIPE)
    return proc.stdout.decode('utf-8').strip()

+def pandoc_stdin(text, from_type):
+    out = subprocess.Popen(
+        ['pandoc', '--smart', '-f', from_type],
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(text.encode('utf-8'))[0]
+    return out.decode('utf-8').strip()
+
 def extract_markdown_title(filename):
    with open(filename) as f:
        for line in f:
@@ -43,10 +53,72 @@ def extract_markdown_title(filename):
    if filename.endswith('.md'):
        return os.path.basename(filename)[:-3]

+def extract_markdown_abstract(filename):
+    state = 0
+    with open(filename) as f:
+        for line in f:
+            if state == 0 and line == '---\n':
+                state = 1
+                yaml_block = ''
+            elif state == 1:
+                if line == '---\n':
+                    y = yaml.load(yaml_block)
+                    if y is None:
+                        return None
+                    else:
+                        return y.get('abstract').strip().replace('\n', ' ')
+                else:
+                    yaml_block += line
+
 def markdown_to_html(input_file, output_dir):
    title = extract_markdown_title(input_file)
    title = html.escape(title)
-    content = pandoc(input_file)
+
+    common = os.path.commonpath([input_file, output_dir])
+    relpath = os.path.relpath(input_file, start=common)
+
+    # Special cases
+    if relpath == 'site/misc/index.md':
+        top = read(input_file)
+
+        pages = []
+        for path, subdirs, subfiles in os.walk(site_dir):
+            dir = path
+            if dir.startswith('./'):
+                dir = dir[1:]
+            if dir.startswith('/site'):
+                dir = dir[5:]
+            if 'index.md' in subfiles:
+                pages.append((dir + '/',
+                              os.path.normpath(os.path.join(base_dir, path, 'index.md'))))
+            for name in subfiles:
+                if name.endswith('.md') and name != 'index.md':
+                    uri = name[:-3]
+                    pages.append((dir + '/' + uri,
+                                  os.path.normpath(os.path.join(base_dir, path, name))))
+
+        builtins = ['/', '/about/', '/about/niels', '/misc/']
+        pages = filter(lambda t: t[0] not in builtins, pages)
+        pages_new = []
+        for page in pages:
+            url, path = page
+            ptitle = extract_markdown_title(path)
+            pabstract = extract_markdown_abstract(path)
+            pages_new.append((ptitle, pabstract, url, path))
+
+        pages_new.sort()
+        md = ''
+        for page in pages_new:
+            ptitle, pabstract, url, _ = page
+            if pabstract is None:
+                pabstract = '(No description)'
+            md += '[{}]({})\n  ~ {}\n\n'.format(ptitle, url, pabstract)
+            
+        bottom = md
+        content = pandoc_stdin(top + '\n' + bottom, 'markdown')
+    else:
+        content = pandoc(input_file)
+
    html_out = template_base.format(title=title, content=content)
    output_file = os.path.join(
        output_dir,