spaCy/website/create_code_samples

#!/usr/bin/env python
from __future__ import unicode_literals

import os
import ast
import io
import re

import plac

# cgi.escape is deprecated since py32
try:
    from html import escape
except ImportError:
    from cgi import escape


# e.g. python website/create_code_samples tests/website/ website/src/
def main(src_dirname, dst_dirname):
    prefix = "test_"
    
    for filename in os.listdir(src_dirname):
        if not filename.startswith('test_'):
            continue
        if not filename.endswith('.py'):
            continue
    
        # Remove test_ prefix and .py suffix
        name = filename[6:-3]
        with io.open(os.path.join(src_dirname, filename), 'r', encoding='utf8') as file_:
            source = file_.readlines()
        tree = ast.parse("".join(source))
    
        for root in tree.body:
            if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix):
    
                # only ast.expr and ast.stmt have line numbers, see:
                # https://docs.python.org/2/library/ast.html#ast.AST.lineno
                line_numbers = []
    
                for node in ast.walk(root):
                    if hasattr(node, "lineno"):
                        line_numbers.append(node.lineno)
    
                body = source[min(line_numbers)-1:max(line_numbers)]
                while not body[0][0].isspace():
                    body = body[1:]
    
                # make sure we are inside an indented function body
                assert all([l[0].isspace() for l in body])
    
                offset = 0
                for line in body:
                    match = re.search(r"[^\s]", line)
                    if match:
                        offset = match.start(0)
                        break
    
                # remove indentation
                assert offset > 0
    
                for i in range(len(body)):
                    body[i] = body[i][offset:] if len(body[i]) > offset else "\n"
    
                # make sure empty lines contain a newline
                assert all([l[-1] == "\n" for l in body])
    
                code_filename = "%s.%s" % (name, root.name[len(prefix):])
    
                with io.open(os.path.join(dst_dirname, code_filename),
                             "w", encoding='utf8') as f:
                    f.write(escape("".join(body)))


if __name__ == '__main__':
    plac.call(main)
new proposal for doctests 2015-09-25 09:52:14 +00:00			`#!/usr/bin/env python`
* Pedantic edits to website/create_code_samples. Make it use plac for interface, remove unnecessary regex, ensure unicode is handled correctly under Python 2. 2015-10-19 01:56:00 +00:00			`from __future__ import unicode_literals`

new proposal for doctests 2015-09-25 09:52:14 +00:00			`import os`
			`import ast`
* Pedantic edits to website/create_code_samples. Make it use plac for interface, remove unnecessary regex, ensure unicode is handled correctly under Python 2. 2015-10-19 01:56:00 +00:00			`import io`
* Add missing import for website/ceate_code_samples 2015-10-25 12:27:55 +00:00			`import re`
* Pedantic edits to website/create_code_samples. Make it use plac for interface, remove unnecessary regex, ensure unicode is handled correctly under Python 2. 2015-10-19 01:56:00 +00:00
			`import plac`
new proposal for doctests 2015-09-25 09:52:14 +00:00
doctests for website: 'home'-section 2015-09-28 00:39:14 +00:00			`# cgi.escape is deprecated since py32`
			`try:`
			`from html import escape`
			`except ImportError:`
			`from cgi import escape`

new proposal for doctests 2015-09-25 09:52:14 +00:00
* Pedantic edits to website/create_code_samples. Make it use plac for interface, remove unnecessary regex, ensure unicode is handled correctly under Python 2. 2015-10-19 01:56:00 +00:00			`# e.g. python website/create_code_samples tests/website/ website/src/`
			`def main(src_dirname, dst_dirname):`
			`prefix = "test_"`

			`for filename in os.listdir(src_dirname):`
			`if not filename.startswith('test_'):`
			`continue`
			`if not filename.endswith('.py'):`
			`continue`

			`# Remove test_ prefix and .py suffix`
			`name = filename[6:-3]`
			`with io.open(os.path.join(src_dirname, filename), 'r', encoding='utf8') as file_:`
			`source = file_.readlines()`
			`tree = ast.parse("".join(source))`

			`for root in tree.body:`
			`if isinstance(root, ast.FunctionDef) and root.name.startswith(prefix):`

			`# only ast.expr and ast.stmt have line numbers, see:`
			`# https://docs.python.org/2/library/ast.html#ast.AST.lineno`
			`line_numbers = []`

			`for node in ast.walk(root):`
			`if hasattr(node, "lineno"):`
			`line_numbers.append(node.lineno)`

			`body = source[min(line_numbers)-1:max(line_numbers)]`
			`while not body[0][0].isspace():`
			`body = body[1:]`

			`# make sure we are inside an indented function body`
			`assert all([l[0].isspace() for l in body])`

			`offset = 0`
			`for line in body:`
			`match = re.search(r"[^\s]", line)`
			`if match:`
			`offset = match.start(0)`
			`break`

			`# remove indentation`
			`assert offset > 0`

			`for i in range(len(body)):`
			`body[i] = body[i][offset:] if len(body[i]) > offset else "\n"`

			`# make sure empty lines contain a newline`
			`assert all([l[-1] == "\n" for l in body])`

			`code_filename = "%s.%s" % (name, root.name[len(prefix):])`

			`with io.open(os.path.join(dst_dirname, code_filename),`
			`"w", encoding='utf8') as f:`
			`f.write(escape("".join(body)))`


			`if __name__ == '__main__':`
			`plac.call(main)`