wtfpython/irrelevant/json_generator.py

"""
An inefficient monolithic piece of code that'll generate jupyter notebook
from the projects main README.

PS: If you are a recruiter, please don't judge me by this piece of code. I wrote it
in hurry. I know this is messy and can be simplified, but I don't want to change it
much because it just works. 

Simplifictions and improvements through patches are more than welcome however :)


#TODOs

- CLI arguments for running this thing
- Add it to prepush hook
- Add support for skip comments, to skip examples that are not meant for notebook environment.
- Use templates?
"""

import json
import pprint
fname = "/Users/300041709/code/self/wtfpython/README.md"
examples = []

# The globals
current_example = 1
sequence_num = 1
current_section_name = ""


STATEMENT_PREFIXES = ["...", ">>> ", "$ "]


def generate_code_block(statements, output):
    global sequence_num
    result = {
        "type": "code",
        "sequence_num": sequence_num,
        "statements": statements,
        "output": output
    }
    sequence_num += 1
    return result


def generate_markdown_block(lines):
    global sequence_num
    result = {
        "type": "markdown",
        "sequence_num": sequence_num,
        "value": lines
    }
    sequence_num += 1
    return result

def is_interactive_statement(line):
    for prefix in STATEMENT_PREFIXES:
        if line.startswith(prefix):
            return True
    return False

def parse_example_parts(lines, example_title_line):
    parts = {
        "build_up": [],
        "explanation": []
    }
    content = []
    statements_so_far = []
    output_so_far = []
    next_line = example_title_line
    # store build_up till an H4 (explanation) is encountered
    while not next_line.startswith("#### "):
        # Watching out for the snippets
        if next_line.startswith("```"):
            # It's a snippet, whatever found until now is text
            is_interactive = False
            if content:
                parts["build_up"].append(generate_markdown_block(content))
                content = []

            next_line = next(lines)

            while not next_line.startswith("```"):
                if is_interactive_statement(next_line):
                    is_interactive = True
                    if (output_so_far):
                        parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
                        statements_so_far, output_so_far = [], []
                    statements_so_far.append(next_line)
                else:
                    # can be either output or normal code
                    if is_interactive:
                        output_so_far.append(next_line)
                    else:
                        statements_so_far.append(next_line)
                next_line = next(lines)

            # Snippet is over
            parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
            statements_so_far, output_so_far = [], []
            next_line = next(lines)
        else:
            # It's a text, go on.
            content.append(next_line)
            next_line = next(lines)

    # Explanation encountered, save any content till now (if any)
    if content:
        parts["build_up"].append(generate_markdown_block(content))

    # Reset stuff
    content = []
    statements_so_far, output_so_far = [], []

    # store lines again until --- or another H3 is encountered
    while not (next_line.startswith("---") or
               next_line.startswith("### ")):
        if next_line.startswith("```"):
            # It's a snippet, whatever found until now is text
            is_interactive = False
            if content:
                parts["build_up"].append(generate_markdown_block(content))
                content = []

            next_line = next(lines)

            while not next_line.startswith("```"):
                if is_interactive_statement(next_line):
                    is_interactive = True
                    if (output_so_far):
                        parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
                        statements_so_far, output_so_far = [], []
                    statements_so_far.append(next_line)
                else:
                    # can be either output or normal code
                    if is_interactive:
                        output_so_far.append(next_line)
                    else:
                        statements_so_far.append(next_line)
                next_line = next(lines)

            # Snippet is over
            parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
            statements_so_far, output_so_far = [], []
            next_line = next(lines)
        else:
            # It's a text, go on.
            content.append(next_line)
            next_line = next(lines)

    # All done
    if content:
        parts["explanation"].append(generate_markdown_block(content))

    return next_line, parts

def remove_from_beginning(tokens, line):
    for token in tokens:
        if line.startswith(token):
            line = line.replace(token, "")
    return line


def inspect_and_sanitize_code_lines(lines):
    tokens_to_remove = STATEMENT_PREFIXES
    result = []
    is_print_present = False
    for line in lines:
        line = remove_from_beginning(tokens_to_remove, line)
        if line.startswith("print ") or line.startswith("print("):
            is_print_present = True
        result.append(line)
    return is_print_present, result

def convert_to_cells(cell_contents):
    cells = []
    for stuff in cell_contents:
        if stuff["type"] == "markdown":
            # todo add metadata later
            cells.append(
                {
                    "cell_type": "markdown",
                    "metadata": {},
                    "source": stuff["value"]
                }
            )
        elif stuff["type"] == "code":
            is_print_present, sanitized_code = inspect_and_sanitize_code_lines(stuff["statements"])
            if is_print_present:
                cells.append(
                    {
                        "cell_type": "code",
                        "metadata": {
                            "collapsed": True
                        },
                        "execution_count": None,
                        "outputs": [{
                            "name": "stdout",
                            "output_type": "stream",
                            "text": stuff["output"]
                        }],
                        "source": sanitized_code
                    }
                )
            else:
                cells.append(
                    {
                        "cell_type": "code",
                        "execution_count": None,
                        "metadata": {
                            "collapsed": True
                        },
                        "outputs": [{
                            "data": {
                                "text/plain": stuff["output"]
                            },
                            "output_type": "execute_result",
                            "metadata": {},
                            "execution_count": None
                        }],
                        "source": sanitized_code
                    }
                )

    return cells


def convert_to_notebook(parsed_json):
    result = {
        "cells": [],
        "metadata": {},
        "nbformat": 4,
        "nbformat_minor": 2
    }
    for example in parsed_json:
        parts = example["parts"]
        build_up = parts.get("build_up")
        explanation = parts.get("explanation")
        notebook_path = "test.ipynb"

        if(build_up):
            result["cells"] += convert_to_cells(build_up)

        if(explanation):
            result["cells"] += convert_to_cells(explanation)

    pprint.pprint(result, indent=2)
    with open(notebook_path, "w") as f:
        json.dump(result, f)


with open(fname, 'r+', encoding="utf-8") as f:
    lines = iter(f.readlines())
    line = next(lines)
    result = []
    try:
        while True:
            if line.startswith("## "):
                # A section is encountered
                current_section_name = line.replace("## ", "").strip()
                section_text = []
                line = next(lines)
                # Until a new section is encountered
                while not (line.startswith("## " )):
                    # check if it's a H3
                    if line.startswith("### "):
                        # An example is encountered
                        title = line.replace("### ", "")
                        example_details = {
                            "id": current_example,
                            "title": line.replace("### ", ""),
                            "section": current_section_name
                        }
                        line, example_details["parts"] = parse_example_parts(lines, line)
                        result.append(example_details)
                        current_example += 1
                    else:
                        section_text.append(line)
                        line = next(lines)
            else:
                line = next(lines)

    except StopIteration:
        pprint.pprint(result, indent=2)
        convert_to_notebook(result)
Add todos 2019-05-06 18:19:57 +00:00			`"""`
			`An inefficient monolithic piece of code that'll generate jupyter notebook`
			`from the projects main README.`

Update comments 2019-10-30 19:29:57 +00:00			`PS: If you are a recruiter, please don't judge me by this piece of code. I wrote it`
			`in hurry. I know this is messy and can be simplified, but I don't want to change it`
			`much because it just works.`
Add todos 2019-05-06 18:19:57 +00:00
Update comments 2019-10-30 19:29:57 +00:00			`Simplifictions and improvements through patches are more than welcome however :)`


			`#TODOs`

			`- CLI arguments for running this thing`
Add todos 2019-05-06 18:19:57 +00:00			`- Add it to prepush hook`
Update comments 2019-10-30 19:29:57 +00:00			`- Add support for skip comments, to skip examples that are not meant for notebook environment.`
Add todos 2019-05-06 18:19:57 +00:00			`- Use templates?`
			`"""`

Get the generator in a working state 2019-05-06 18:17:24 +00:00			`import json`
Add json generator 2019-05-01 13:42:17 +00:00			`import pprint`
			`fname = "/Users/300041709/code/self/wtfpython/README.md"`
			`examples = []`

			`# The globals`
			`current_example = 1`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`sequence_num = 1`
Add json generator 2019-05-01 13:42:17 +00:00			`current_section_name = ""`


Get the generator in a working state 2019-05-06 18:17:24 +00:00			`STATEMENT_PREFIXES = ["...", ">>> ", "$ "]`


			`def generate_code_block(statements, output):`
			`global sequence_num`
			`result = {`
			`"type": "code",`
			`"sequence_num": sequence_num,`
			`"statements": statements,`
			`"output": output`
			`}`
			`sequence_num += 1`
			`return result`


			`def generate_markdown_block(lines):`
			`global sequence_num`
			`result = {`
			`"type": "markdown",`
			`"sequence_num": sequence_num,`
			`"value": lines`
			`}`
			`sequence_num += 1`
			`return result`

			`def is_interactive_statement(line):`
			`for prefix in STATEMENT_PREFIXES:`
			`if line.startswith(prefix):`
			`return True`
			`return False`

			`def parse_example_parts(lines, example_title_line):`
Add json generator 2019-05-01 13:42:17 +00:00			`parts = {`
			`"build_up": [],`
			`"explanation": []`
			`}`
			`content = []`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`statements_so_far = []`
			`output_so_far = []`
			`next_line = example_title_line`
Add json generator 2019-05-01 13:42:17 +00:00			`# store build_up till an H4 (explanation) is encountered`
			`while not next_line.startswith("#### "):`
			`# Watching out for the snippets`
			if next_line.startswith("```"):
			`# It's a snippet, whatever found until now is text`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`is_interactive = False`
Add json generator 2019-05-01 13:42:17 +00:00			`if content:`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`parts["build_up"].append(generate_markdown_block(content))`
Add json generator 2019-05-01 13:42:17 +00:00			`content = []`

			`next_line = next(lines)`
Get the generator in a working state 2019-05-06 18:17:24 +00:00
Add json generator 2019-05-01 13:42:17 +00:00			while not next_line.startswith("```"):
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`if is_interactive_statement(next_line):`
			`is_interactive = True`
			`if (output_so_far):`
			`parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))`
			`statements_so_far, output_so_far = [], []`
			`statements_so_far.append(next_line)`
			`else:`
			`# can be either output or normal code`
			`if is_interactive:`
			`output_so_far.append(next_line)`
			`else:`
			`statements_so_far.append(next_line)`
Add json generator 2019-05-01 13:42:17 +00:00			`next_line = next(lines)`
Get the generator in a working state 2019-05-06 18:17:24 +00:00
Add json generator 2019-05-01 13:42:17 +00:00			`# Snippet is over`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))`
			`statements_so_far, output_so_far = [], []`
Add json generator 2019-05-01 13:42:17 +00:00			`next_line = next(lines)`
			`else:`
			`# It's a text, go on.`
			`content.append(next_line)`
			`next_line = next(lines)`

			`# Explanation encountered, save any content till now (if any)`
			`if content:`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`parts["build_up"].append(generate_markdown_block(content))`
Add json generator 2019-05-01 13:42:17 +00:00
			`# Reset stuff`
			`content = []`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`statements_so_far, output_so_far = [], []`
Add json generator 2019-05-01 13:42:17 +00:00
			`# store lines again until --- or another H3 is encountered`
			`while not (next_line.startswith("---") or`
			`next_line.startswith("### ")):`
			if next_line.startswith("```"):
			`# It's a snippet, whatever found until now is text`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`is_interactive = False`
Add json generator 2019-05-01 13:42:17 +00:00			`if content:`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`parts["build_up"].append(generate_markdown_block(content))`
Add json generator 2019-05-01 13:42:17 +00:00			`content = []`

			`next_line = next(lines)`
Get the generator in a working state 2019-05-06 18:17:24 +00:00
Add json generator 2019-05-01 13:42:17 +00:00			while not next_line.startswith("```"):
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`if is_interactive_statement(next_line):`
			`is_interactive = True`
			`if (output_so_far):`
			`parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))`
			`statements_so_far, output_so_far = [], []`
			`statements_so_far.append(next_line)`
			`else:`
			`# can be either output or normal code`
			`if is_interactive:`
			`output_so_far.append(next_line)`
			`else:`
			`statements_so_far.append(next_line)`
Add json generator 2019-05-01 13:42:17 +00:00			`next_line = next(lines)`
Get the generator in a working state 2019-05-06 18:17:24 +00:00
Add json generator 2019-05-01 13:42:17 +00:00			`# Snippet is over`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))`
			`statements_so_far, output_so_far = [], []`
Add json generator 2019-05-01 13:42:17 +00:00			`next_line = next(lines)`
			`else:`
			`# It's a text, go on.`
			`content.append(next_line)`
			`next_line = next(lines)`

			`# All done`
			`if content:`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`parts["explanation"].append(generate_markdown_block(content))`
Add json generator 2019-05-01 13:42:17 +00:00
			`return next_line, parts`

Get the generator in a working state 2019-05-06 18:17:24 +00:00			`def remove_from_beginning(tokens, line):`
			`for token in tokens:`
			`if line.startswith(token):`
			`line = line.replace(token, "")`
			`return line`


			`def inspect_and_sanitize_code_lines(lines):`
			`tokens_to_remove = STATEMENT_PREFIXES`
			`result = []`
			`is_print_present = False`
			`for line in lines:`
			`line = remove_from_beginning(tokens_to_remove, line)`
			`if line.startswith("print ") or line.startswith("print("):`
			`is_print_present = True`
			`result.append(line)`
			`return is_print_present, result`

			`def convert_to_cells(cell_contents):`
			`cells = []`
			`for stuff in cell_contents:`
			`if stuff["type"] == "markdown":`
			`# todo add metadata later`
			`cells.append(`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": stuff["value"]`
			`}`
			`)`
			`elif stuff["type"] == "code":`
			`is_print_present, sanitized_code = inspect_and_sanitize_code_lines(stuff["statements"])`
			`if is_print_present:`
			`cells.append(`
			`{`
			`"cell_type": "code",`
			`"metadata": {`
			`"collapsed": True`
			`},`
			`"execution_count": None,`
			`"outputs": [{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": stuff["output"]`
			`}],`
			`"source": sanitized_code`
			`}`
			`)`
			`else:`
			`cells.append(`
			`{`
			`"cell_type": "code",`
			`"execution_count": None,`
			`"metadata": {`
			`"collapsed": True`
			`},`
			`"outputs": [{`
			`"data": {`
			`"text/plain": stuff["output"]`
			`},`
			`"output_type": "execute_result",`
			`"metadata": {},`
			`"execution_count": None`
			`}],`
			`"source": sanitized_code`
			`}`
			`)`

			`return cells`


			`def convert_to_notebook(parsed_json):`
			`result = {`
			`"cells": [],`
			`"metadata": {},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`
			`for example in parsed_json:`
			`parts = example["parts"]`
			`build_up = parts.get("build_up")`
			`explanation = parts.get("explanation")`
			`notebook_path = "test.ipynb"`

			`if(build_up):`
			`result["cells"] += convert_to_cells(build_up)`

			`if(explanation):`
			`result["cells"] += convert_to_cells(explanation)`

			`pprint.pprint(result, indent=2)`
			`with open(notebook_path, "w") as f:`
			`json.dump(result, f)`


Add json generator 2019-05-01 13:42:17 +00:00
			`with open(fname, 'r+', encoding="utf-8") as f:`
			`lines = iter(f.readlines())`
			`line = next(lines)`
			`result = []`
			`try:`
			`while True:`
			`if line.startswith("## "):`
			`# A section is encountered`
			`current_section_name = line.replace("## ", "").strip()`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`section_text = []`
Add json generator 2019-05-01 13:42:17 +00:00			`line = next(lines)`
			`# Until a new section is encountered`
			`while not (line.startswith("## " )):`
			`# check if it's a H3`
			`if line.startswith("### "):`
			`# An example is encountered`
			`title = line.replace("### ", "")`
			`example_details = {`
			`"id": current_example,`
			`"title": line.replace("### ", ""),`
			`"section": current_section_name`
			`}`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`line, example_details["parts"] = parse_example_parts(lines, line)`
Add json generator 2019-05-01 13:42:17 +00:00			`result.append(example_details)`
			`current_example += 1`
			`else:`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`section_text.append(line)`
Add json generator 2019-05-01 13:42:17 +00:00			`line = next(lines)`
			`else:`
			`line = next(lines)`

			`except StopIteration:`
			`pprint.pprint(result, indent=2)`
Get the generator in a working state 2019-05-06 18:17:24 +00:00			`convert_to_notebook(result)`