pokecrystal/tools/toc.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Usage: python3 toc.py [-n] files.md...
Replace a "## TOC" heading in a Markdown file with a table of contents,
generated from the other headings in the file. Supports multiple files.
Headings must start with "##" signs to be detected.
"""

import sys
import re
from collections import namedtuple

toc_name = 'Contents'
valid_toc_headings = {'## TOC', '##TOC'}

TocItem = namedtuple('TocItem', ['name', 'anchor', 'level'])
punctuation_regexp = re.compile(r'[^\w\- ]+')

def name_to_anchor(name):
	# GitHub's algorithm for generating anchors from headings
	# https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
	anchor = name.strip().lower()                   # lowercase
	anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation
	anchor = anchor.replace(' ', '-')               # replace spaces with dash
	return anchor

def get_toc_index(lines):
	toc_index = None
	for i, line in enumerate(lines):
		if line.rstrip() in valid_toc_headings:
			toc_index = i
			break
	return toc_index

def get_toc_items(lines, toc_index):
	for i, line in enumerate(lines):
		if i <= toc_index:
			continue
		if line.startswith('##'):
			name = line.lstrip('#')
			level = len(line) - len(name) - len('##')
			name = name.strip()
			anchor = name_to_anchor(name)
			yield TocItem(name, anchor, level)

def toc_string(toc_items):
	lines = ['## %s' % toc_name, '']
	for name, anchor, level in toc_items:
		padding = '  ' * level
		line = '%s- [%s](#%s)' % (padding, name, anchor)
		lines.append(line)
	return '\n'.join(lines) + '\n'

def add_toc(filename):
	with open(filename, 'r', encoding='utf-8') as f:
		lines = f.readlines()
	toc_index = get_toc_index(lines)
	if toc_index is None:
		return None # no TOC heading
	toc_items = list(get_toc_items(lines, toc_index))
	if not toc_items:
		return False # no content headings
	with open(filename, 'w', encoding='utf-8') as f:
		for i, line in enumerate(lines):
			if i == toc_index:
				f.write(toc_string(toc_items))
			else:
				f.write(line)
	return True # OK

def main():
	if len(sys.argv) < 2:
		print('*** ERROR: No filenames specified')
		print(__doc__)
		exit(1)
	for filename in sys.argv[1:]:
		print(filename)
		result = add_toc(filename)
		if result is None:
			print('*** WARNING: No "## TOC" heading found')
		elif result is False:
			print('*** WARNING: No content headings found')
		else:
			print('OK')

if __name__ == '__main__':
	main()
Fix toc.py line endings (for travis-ci) 2018-09-20 15:32:53 +00:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`"""`
			`Usage: python3 toc.py [-n] files.md...`
			`Replace a "## TOC" heading in a Markdown file with a table of contents,`
			`generated from the other headings in the file. Supports multiple files.`
			`Headings must start with "##" signs to be detected.`
			`"""`

			`import sys`
			`import re`
			`from collections import namedtuple`

			`toc_name = 'Contents'`
			`valid_toc_headings = {'## TOC', '##TOC'}`

			`TocItem = namedtuple('TocItem', ['name', 'anchor', 'level'])`
			`punctuation_regexp = re.compile(r'[^\w\- ]+')`

			`def name_to_anchor(name):`
			`# GitHub's algorithm for generating anchors from headings`
			`# https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb`
			`anchor = name.strip().lower() # lowercase`
			`anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation`
			`anchor = anchor.replace(' ', '-') # replace spaces with dash`
			`return anchor`

			`def get_toc_index(lines):`
			`toc_index = None`
			`for i, line in enumerate(lines):`
			`if line.rstrip() in valid_toc_headings:`
			`toc_index = i`
			`break`
			`return toc_index`

			`def get_toc_items(lines, toc_index):`
			`for i, line in enumerate(lines):`
			`if i <= toc_index:`
			`continue`
			`if line.startswith('##'):`
			`name = line.lstrip('#')`
			`level = len(line) - len(name) - len('##')`
			`name = name.strip()`
			`anchor = name_to_anchor(name)`
			`yield TocItem(name, anchor, level)`

			`def toc_string(toc_items):`
			`lines = ['## %s' % toc_name, '']`
			`for name, anchor, level in toc_items:`
			`padding = ' ' * level`
			`line = '%s- [%s](#%s)' % (padding, name, anchor)`
			`lines.append(line)`
			`return '\n'.join(lines) + '\n'`

			`def add_toc(filename):`
			`with open(filename, 'r', encoding='utf-8') as f:`
			`lines = f.readlines()`
			`toc_index = get_toc_index(lines)`
			`if toc_index is None:`
			`return None # no TOC heading`
			`toc_items = list(get_toc_items(lines, toc_index))`
			`if not toc_items:`
			`return False # no content headings`
			`with open(filename, 'w', encoding='utf-8') as f:`
			`for i, line in enumerate(lines):`
			`if i == toc_index:`
			`f.write(toc_string(toc_items))`
			`else:`
			`f.write(line)`
			`return True # OK`

			`def main():`
			`if len(sys.argv) < 2:`
			`print('*** ERROR: No filenames specified')`
			`print(__doc__)`
			`exit(1)`
			`for filename in sys.argv[1:]:`
			`print(filename)`
			`result = add_toc(filename)`
			`if result is None:`
			`print('*** WARNING: No "## TOC" heading found')`
			`elif result is False:`
			`print('*** WARNING: No content headings found')`
			`else:`
			`print('OK')`

			`if __name__ == '__main__':`
			`main()`