Generate article sitemap thing.

This commit is contained in:
2016-09-02 23:32:55 +02:00
parent 7a08eccc10
commit 4b866eafef
6 changed files with 84 additions and 10 deletions

View File

@@ -14,6 +14,8 @@ import os
import html
import subprocess
import yaml
def read(path):
with open(path) as f:
@@ -29,12 +31,20 @@ base_dir = os.path.split(script_dir)[0]
template_dir = os.path.join(base_dir, 'template')
template_base_file = os.path.join(template_dir, 'base.html')
template_base = read(template_base_file)
site_dir = os.path.join(base_dir, 'site')
def pandoc(filename):
proc = subprocess.run(['pandoc', '--smart', filename], stdout=subprocess.PIPE)
proc = subprocess.run(['pandoc', '--smart', filename],
stdout=subprocess.PIPE)
return proc.stdout.decode('utf-8').strip()
def pandoc_stdin(text, from_type):
out = subprocess.Popen(
['pandoc', '--smart', '-f', from_type],
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(text.encode('utf-8'))[0]
return out.decode('utf-8').strip()
def extract_markdown_title(filename):
with open(filename) as f:
for line in f:
@@ -43,10 +53,72 @@ def extract_markdown_title(filename):
if filename.endswith('.md'):
return os.path.basename(filename)[:-3]
def extract_markdown_abstract(filename):
state = 0
with open(filename) as f:
for line in f:
if state == 0 and line == '---\n':
state = 1
yaml_block = ''
elif state == 1:
if line == '---\n':
y = yaml.load(yaml_block)
if y is None:
return None
else:
return y.get('abstract').strip().replace('\n', ' ')
else:
yaml_block += line
def markdown_to_html(input_file, output_dir):
title = extract_markdown_title(input_file)
title = html.escape(title)
content = pandoc(input_file)
common = os.path.commonpath([input_file, output_dir])
relpath = os.path.relpath(input_file, start=common)
# Special cases
if relpath == 'site/misc/index.md':
top = read(input_file)
pages = []
for path, subdirs, subfiles in os.walk(site_dir):
dir = path
if dir.startswith('./'):
dir = dir[1:]
if dir.startswith('/site'):
dir = dir[5:]
if 'index.md' in subfiles:
pages.append((dir + '/',
os.path.normpath(os.path.join(base_dir, path, 'index.md'))))
for name in subfiles:
if name.endswith('.md') and name != 'index.md':
uri = name[:-3]
pages.append((dir + '/' + uri,
os.path.normpath(os.path.join(base_dir, path, name))))
builtins = ['/', '/about/', '/about/niels', '/misc/']
pages = filter(lambda t: t[0] not in builtins, pages)
pages_new = []
for page in pages:
url, path = page
ptitle = extract_markdown_title(path)
pabstract = extract_markdown_abstract(path)
pages_new.append((ptitle, pabstract, url, path))
pages_new.sort()
md = ''
for page in pages_new:
ptitle, pabstract, url, _ = page
if pabstract is None:
pabstract = '(No description)'
md += '[{}]({})\n ~ {}\n\n'.format(ptitle, url, pabstract)
bottom = md
content = pandoc_stdin(top + '\n' + bottom, 'markdown')
else:
content = pandoc(input_file)
html_out = template_base.format(title=title, content=content)
output_file = os.path.join(
output_dir,