metanohi/scripts/transform-file.py

182 lines
5.5 KiB
Python
Raw Permalink Normal View History

2016-08-26 15:26:30 +02:00
#!/usr/bin/env python3
'''
Transforms a file into a web-servable file.
In most cases this entails either
+ just symlinking, or
+ transforming a page to html
'''
import sys
import os
import html
import subprocess
2016-09-02 23:32:55 +02:00
import yaml
2016-08-26 15:26:30 +02:00
def read(path):
with open(path) as f:
return f.read()
def write(path, text):
with open(path, 'w') as f:
f.write(text)
script_dir = os.path.dirname(__file__)
base_dir = os.path.split(script_dir)[0]
template_dir = os.path.join(base_dir, 'template')
template_base_file = os.path.join(template_dir, 'base.html')
template_base = read(template_base_file)
2016-09-02 23:32:55 +02:00
site_dir = os.path.join(base_dir, 'site')
2016-08-26 15:26:30 +02:00
def pandoc(filename):
2018-08-21 19:16:55 +02:00
proc = subprocess.run(['pandoc', '-f', 'markdown+smart', filename],
2016-09-02 23:32:55 +02:00
stdout=subprocess.PIPE)
2016-08-26 15:26:30 +02:00
return proc.stdout.decode('utf-8').strip()
2018-08-21 19:16:55 +02:00
def pandoc_stdin(text):
2016-09-02 23:32:55 +02:00
out = subprocess.Popen(
2018-08-21 19:16:55 +02:00
['pandoc', '-f', 'markdown+smart'],
2016-09-02 23:32:55 +02:00
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(text.encode('utf-8'))[0]
return out.decode('utf-8').strip()
2016-08-26 15:26:30 +02:00
def extract_markdown_title(filename):
with open(filename) as f:
for line in f:
if line.startswith('# '):
return line[2:].strip()
if filename.endswith('.md'):
return os.path.basename(filename)[:-3]
def extract_markdown_yaml(filename):
2016-09-02 23:32:55 +02:00
state = 0
with open(filename) as f:
for line in f:
if state == 0 and line == '---\n':
state = 1
yaml_block = ''
elif state == 1:
if line == '---\n':
2019-10-29 13:48:51 +01:00
y = yaml.safe_load(yaml_block)
2016-09-02 23:32:55 +02:00
if y is None:
return None
else:
if 'abstract' in y:
y['abstract'] = y.get('abstract').strip().replace('\n', ' ')
if 'lastupdated' in y:
y['lastupdated'] = str(y.get('lastupdated'))
return y
2016-09-02 23:32:55 +02:00
else:
yaml_block += line
2016-08-26 15:26:30 +02:00
def markdown_to_html(input_file, output_dir):
title = extract_markdown_title(input_file)
title = html.escape(title)
2016-09-02 23:32:55 +02:00
common = os.path.commonpath([input_file, output_dir])
relpath = os.path.relpath(input_file, start=common)
# Special cases
if relpath == 'site/index.md':
input_base = read(input_file)
2016-09-02 23:32:55 +02:00
pages = []
for path, subdirs, subfiles in os.walk(site_dir):
dir = path
if dir.startswith(site_dir):
dir = path[len(site_dir):]
2016-09-02 23:32:55 +02:00
if dir.startswith('./'):
dir = dir[1:]
if dir.startswith('/site'):
dir = dir[5:]
if 'index.md' in subfiles:
pages.append((dir + '/',
os.path.normpath(os.path.join(base_dir, path, 'index.md'))))
for name in subfiles:
if name.endswith('.md') and name != 'index.md':
uri = name[:-3]
pages.append((dir + '/' + uri,
os.path.normpath(os.path.join(base_dir, path, name))))
builtins = ['/', '/about/', '/about/niels', '/404']
2016-09-02 23:32:55 +02:00
pages = filter(lambda t: t[0] not in builtins, pages)
pages_new = []
for page in pages:
url, path = page
ptitle = extract_markdown_title(path)
pyaml = extract_markdown_yaml(path)
pages_new.append((ptitle, pyaml, url, path))
2016-09-02 23:32:55 +02:00
pages_new.append(('Potators', {'abstract': 'Do not look.', 'lastupdated': '2011'}, '/potator/', '/potator/'))
2019-09-09 12:33:44 +02:00
2022-11-14 22:40:31 +01:00
pages_new.sort(key=lambda p: ('9999' if p[1] is None else p[1].get('lastupdated') or '9999', p[0]), reverse=True)
2016-09-02 23:32:55 +02:00
md = ''
for page in pages_new:
ptitle, pyaml, url, _ = page
if pyaml is None or pyaml.get('abstract') is None:
2016-09-02 23:32:55 +02:00
pabstract = '(No description)'
else:
pabstract = pyaml['abstract']
lu = pyaml.get('lastupdated')
if lu is not None:
pabstract += ' ({})'.format(lu)
2016-09-02 23:32:55 +02:00
md += '[{}]({})\n ~ {}\n\n'.format(ptitle, url, pabstract)
content = pandoc_stdin(input_base.replace('SPECIAL:ARTICLES', md))
2016-09-02 23:32:55 +02:00
else:
content = pandoc(input_file)
2016-08-26 15:26:30 +02:00
html_out = template_base.format(title=title, content=content)
output_file = os.path.join(
output_dir,
os.path.splitext(os.path.basename(input_file))[0]
+ '.html')
write(output_file, html_out)
def symlink_relative(input_file, output_dir):
in_path_abs = os.path.abspath(input_file)
os.chdir(output_dir)
in_path_rel = os.path.relpath(in_path_abs)
output_file = os.path.basename(input_file)
os.symlink(in_path_rel, output_file)
def get_extension(filename):
_, extension = os.path.splitext(filename)
extension = extension[1:]
return extension
def transform(input_file, output_dir):
extension = get_extension(input_file)
actions = {
'md': lambda: markdown_to_html(input_file, output_dir)
}
try:
action = actions[extension]
except KeyError:
action = lambda: symlink_relative(input_file, output_dir)
action()
return 0
def main(args):
try:
[input_file, output_dir] = args
except ValueError:
print_usage()
return 1
transform(input_file, output_dir)
def print_usage():
print('''\
usage: transform-file.py INPUT_FILE OUTPUT_DIRECTORY
''')
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))