metanohi/scripts/transform-file.py

#!/usr/bin/env python3

'''
Transforms a file into a web-servable file.

In most cases this entails either

  + just symlinking, or
  + transforming a page to html
'''

import sys
import os
import html
import subprocess

import yaml


def read(path):
    with open(path) as f:
        return f.read()

def write(path, text):
    with open(path, 'w') as f:
        f.write(text)


script_dir = os.path.dirname(__file__)
base_dir = os.path.split(script_dir)[0]
template_dir = os.path.join(base_dir, 'template')
template_base_file = os.path.join(template_dir, 'base.html')
template_base = read(template_base_file)
site_dir = os.path.join(base_dir, 'site')


def pandoc(filename):
    proc = subprocess.run(['pandoc', '-f', 'markdown+smart', filename],
                          stdout=subprocess.PIPE)
    return proc.stdout.decode('utf-8').strip()

def pandoc_stdin(text):
    out = subprocess.Popen(
        ['pandoc', '-f', 'markdown+smart'],
        stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(text.encode('utf-8'))[0]
    return out.decode('utf-8').strip()

def extract_markdown_title(filename):
    with open(filename) as f:
        for line in f:
            if line.startswith('# '):
                return line[2:].strip()
    if filename.endswith('.md'):
        return os.path.basename(filename)[:-3]

def extract_markdown_yaml(filename):
    state = 0
    with open(filename) as f:
        for line in f:
            if state == 0 and line == '---\n':
                state = 1
                yaml_block = ''
            elif state == 1:
                if line == '---\n':
                    y = yaml.safe_load(yaml_block)
                    if y is None:
                        return None
                    else:
                        if 'abstract' in y:
                            y['abstract'] = y.get('abstract').strip().replace('\n', ' ')
                        if 'lastupdated' in y:
                            y['lastupdated'] = str(y.get('lastupdated'))
                        return y
                else:
                    yaml_block += line

def markdown_to_html(input_file, output_dir):
    title = extract_markdown_title(input_file)
    title = html.escape(title)

    common = os.path.commonpath([input_file, output_dir])
    relpath = os.path.relpath(input_file, start=common)

    # Special cases
    if relpath == 'site/index.md':
        input_base = read(input_file)

        pages = []
        for path, subdirs, subfiles in os.walk(site_dir):
            dir = path
            if dir.startswith(site_dir):
                dir = path[len(site_dir):]
            if dir.startswith('./'):
                dir = dir[1:]
            if dir.startswith('/site'):
                dir = dir[5:]
            if 'index.md' in subfiles:
                pages.append((dir + '/',
                              os.path.normpath(os.path.join(base_dir, path, 'index.md'))))
            for name in subfiles:
                if name.endswith('.md') and name != 'index.md':
                    uri = name[:-3]
                    pages.append((dir + '/' + uri,
                                  os.path.normpath(os.path.join(base_dir, path, name))))

        builtins = ['/', '/about/', '/about/niels', '/404']
        pages = filter(lambda t: t[0] not in builtins, pages)
        pages_new = []
        for page in pages:
            url, path = page
            ptitle = extract_markdown_title(path)
            pyaml = extract_markdown_yaml(path)
            pages_new.append((ptitle, pyaml, url, path))

        pages_new.append(('Potators', {'abstract': 'Do not look.', 'lastupdated': '2011'}, '/potator/', '/potator/'))

        pages_new.sort(key=lambda p: ('9999' if p[1] is None else p[1].get('lastupdated') or '9999', p[0]), reverse=True)
        md = ''
        for page in pages_new:
            ptitle, pyaml, url, _ = page
            if pyaml is None or pyaml.get('abstract') is None:
                pabstract = '(No description)'
            else:
                pabstract = pyaml['abstract']
                lu = pyaml.get('lastupdated')
                if lu is not None:
                    pabstract += ' ({})'.format(lu)
            md += '[{}]({})\n  ~ {}\n\n'.format(ptitle, url, pabstract)
            
        content = pandoc_stdin(input_base.replace('SPECIAL:ARTICLES', md))
    else:
        content = pandoc(input_file)

    html_out = template_base.format(title=title, content=content)
    output_file = os.path.join(
        output_dir,
        os.path.splitext(os.path.basename(input_file))[0]
        + '.html')
    write(output_file, html_out)

def symlink_relative(input_file, output_dir):
    in_path_abs = os.path.abspath(input_file)
    os.chdir(output_dir)
    in_path_rel = os.path.relpath(in_path_abs)
    output_file = os.path.basename(input_file)
    os.symlink(in_path_rel, output_file)

def get_extension(filename):
    _, extension = os.path.splitext(filename)
    extension = extension[1:]
    return extension
    
def transform(input_file, output_dir):
    extension = get_extension(input_file)

    actions = {
        'md': lambda: markdown_to_html(input_file, output_dir)
    }
    try:
        action = actions[extension]
    except KeyError:
        action = lambda: symlink_relative(input_file, output_dir)
    action()
    return 0

def main(args):
    try:
        [input_file, output_dir] = args
    except ValueError:
        print_usage()
        return 1

    transform(input_file, output_dir)

def print_usage():
    print('''\
usage: transform-file.py INPUT_FILE OUTPUT_DIRECTORY
''')
    
if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))
Add basic site generation scripts. 2016-08-26 15:26:30 +02:00			`#!/usr/bin/env python3`

			`'''`
			`Transforms a file into a web-servable file.`

			`In most cases this entails either`

			`+ just symlinking, or`
			`+ transforming a page to html`
			`'''`

			`import sys`
			`import os`
			`import html`
			`import subprocess`

Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`import yaml`

Add basic site generation scripts. 2016-08-26 15:26:30 +02:00
			`def read(path):`
			`with open(path) as f:`
			`return f.read()`

			`def write(path, text):`
			`with open(path, 'w') as f:`
			`f.write(text)`


			`script_dir = os.path.dirname(__file__)`
			`base_dir = os.path.split(script_dir)[0]`
			`template_dir = os.path.join(base_dir, 'template')`
			`template_base_file = os.path.join(template_dir, 'base.html')`
			`template_base = read(template_base_file)`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`site_dir = os.path.join(base_dir, 'site')`
Add basic site generation scripts. 2016-08-26 15:26:30 +02:00

			`def pandoc(filename):`
Use the new pandoc syntax. 2018-08-21 19:16:55 +02:00			`proc = subprocess.run(['pandoc', '-f', 'markdown+smart', filename],`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`stdout=subprocess.PIPE)`
Add basic site generation scripts. 2016-08-26 15:26:30 +02:00			`return proc.stdout.decode('utf-8').strip()`

Use the new pandoc syntax. 2018-08-21 19:16:55 +02:00			`def pandoc_stdin(text):`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`out = subprocess.Popen(`
Use the new pandoc syntax. 2018-08-21 19:16:55 +02:00			`['pandoc', '-f', 'markdown+smart'],`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(text.encode('utf-8'))[0]`
			`return out.decode('utf-8').strip()`

Add basic site generation scripts. 2016-08-26 15:26:30 +02:00			`def extract_markdown_title(filename):`
			`with open(filename) as f:`
			`for line in f:`
			`if line.startswith('# '):`
			`return line[2:].strip()`
			`if filename.endswith('.md'):`
			`return os.path.basename(filename)[:-3]`

Add years to writings and sort based on them. 2019-10-30 13:28:37 +01:00			`def extract_markdown_yaml(filename):`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`state = 0`
			`with open(filename) as f:`
			`for line in f:`
			`if state == 0 and line == '---\n':`
			`state = 1`
			`yaml_block = ''`
			`elif state == 1:`
			`if line == '---\n':`
Update YAML library use. 2019-10-29 13:48:51 +01:00			`y = yaml.safe_load(yaml_block)`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`if y is None:`
			`return None`
			`else:`
Add years to writings and sort based on them. 2019-10-30 13:28:37 +01:00			`if 'abstract' in y:`
			`y['abstract'] = y.get('abstract').strip().replace('\n', ' ')`
			`if 'lastupdated' in y:`
Support more fine-grained lastupdated data. 2019-10-30 15:31:16 +01:00			`y['lastupdated'] = str(y.get('lastupdated'))`
Add years to writings and sort based on them. 2019-10-30 13:28:37 +01:00			`return y`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`else:`
			`yaml_block += line`

Add basic site generation scripts. 2016-08-26 15:26:30 +02:00			`def markdown_to_html(input_file, output_dir):`
			`title = extract_markdown_title(input_file)`
			`title = html.escape(title)`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00
			`common = os.path.commonpath([input_file, output_dir])`
			`relpath = os.path.relpath(input_file, start=common)`

			`# Special cases`
Get rid of the "misc" page. Just put it on the front page. 2019-10-30 01:09:24 +01:00			`if relpath == 'site/index.md':`
			`input_base = read(input_file)`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00
			`pages = []`
			`for path, subdirs, subfiles in os.walk(site_dir):`
			`dir = path`
This extra cleaning up appears to be necessary now 2024-03-22 18:12:09 +01:00			`if dir.startswith(site_dir):`
			`dir = path[len(site_dir):]`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`if dir.startswith('./'):`
			`dir = dir[1:]`
			`if dir.startswith('/site'):`
			`dir = dir[5:]`
			`if 'index.md' in subfiles:`
			`pages.append((dir + '/',`
			`os.path.normpath(os.path.join(base_dir, path, 'index.md'))))`
			`for name in subfiles:`
			`if name.endswith('.md') and name != 'index.md':`
			`uri = name[:-3]`
			`pages.append((dir + '/' + uri,`
			`os.path.normpath(os.path.join(base_dir, path, name))))`

Get rid of the "misc" page. Just put it on the front page. 2019-10-30 01:09:24 +01:00			`builtins = ['/', '/about/', '/about/niels', '/404']`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`pages = filter(lambda t: t[0] not in builtins, pages)`
			`pages_new = []`
			`for page in pages:`
			`url, path = page`
			`ptitle = extract_markdown_title(path)`
Add years to writings and sort based on them. 2019-10-30 13:28:37 +01:00			`pyaml = extract_markdown_yaml(path)`
			`pages_new.append((ptitle, pyaml, url, path))`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00
Support more fine-grained lastupdated data. 2019-10-30 15:31:16 +01:00			`pages_new.append(('Potators', {'abstract': 'Do not look.', 'lastupdated': '2011'}, '/potator/', '/potator/'))`
Add more links. 2019-09-09 12:33:44 +02:00
Fix script How did this use to work? 2022-11-14 22:40:31 +01:00			`pages_new.sort(key=lambda p: ('9999' if p[1] is None else p[1].get('lastupdated') or '9999', p[0]), reverse=True)`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`md = ''`
			`for page in pages_new:`
Add years to writings and sort based on them. 2019-10-30 13:28:37 +01:00			`ptitle, pyaml, url, _ = page`
			`if pyaml is None or pyaml.get('abstract') is None:`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`pabstract = '(No description)'`
Add years to writings and sort based on them. 2019-10-30 13:28:37 +01:00			`else:`
			`pabstract = pyaml['abstract']`
			`lu = pyaml.get('lastupdated')`
			`if lu is not None:`
			`pabstract += ' ({})'.format(lu)`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`md += '[{}]({})\n ~ {}\n\n'.format(ptitle, url, pabstract)`

Get rid of the "misc" page. Just put it on the front page. 2019-10-30 01:09:24 +01:00			`content = pandoc_stdin(input_base.replace('SPECIAL:ARTICLES', md))`
Generate article sitemap thing. 2016-09-02 23:32:55 +02:00			`else:`
			`content = pandoc(input_file)`

Add basic site generation scripts. 2016-08-26 15:26:30 +02:00			`html_out = template_base.format(title=title, content=content)`
			`output_file = os.path.join(`
			`output_dir,`
			`os.path.splitext(os.path.basename(input_file))[0]`
			`+ '.html')`
			`write(output_file, html_out)`

			`def symlink_relative(input_file, output_dir):`
			`in_path_abs = os.path.abspath(input_file)`
			`os.chdir(output_dir)`
			`in_path_rel = os.path.relpath(in_path_abs)`
			`output_file = os.path.basename(input_file)`
			`os.symlink(in_path_rel, output_file)`

			`def get_extension(filename):`
			`_, extension = os.path.splitext(filename)`
			`extension = extension[1:]`
			`return extension`

			`def transform(input_file, output_dir):`
			`extension = get_extension(input_file)`

			`actions = {`
			`'md': lambda: markdown_to_html(input_file, output_dir)`
			`}`
			`try:`
			`action = actions[extension]`
			`except KeyError:`
			`action = lambda: symlink_relative(input_file, output_dir)`
			`action()`
			`return 0`

			`def main(args):`
			`try:`
			`[input_file, output_dir] = args`
			`except ValueError:`
			`print_usage()`
			`return 1`

			`transform(input_file, output_dir)`

			`def print_usage():`
			`print('''\`
			`usage: transform-file.py INPUT_FILE OUTPUT_DIRECTORY`
			`''')`

			`if __name__ == '__main__':`
			`sys.exit(main(sys.argv[1:]))`