330 lines
11 KiB
Python
330 lines
11 KiB
Python
import markdown
|
|
import re
|
|
|
|
def isString(s):
|
|
""" Check if it's string """
|
|
return isinstance(s, unicode) or isinstance(s, str)
|
|
|
|
class Processor:
|
|
def __init__(self, markdown_instance=None):
|
|
if markdown_instance:
|
|
self.markdown = markdown_instance
|
|
|
|
class Treeprocessor(Processor):
|
|
"""
|
|
Treeprocessors are run on the ElementTree object before serialization.
|
|
|
|
Each Treeprocessor implements a "run" method that takes a pointer to an
|
|
ElementTree, modifies it as necessary and returns an ElementTree
|
|
object.
|
|
|
|
Treeprocessors must extend markdown.Treeprocessor.
|
|
|
|
"""
|
|
def run(self, root):
|
|
"""
|
|
Subclasses of Treeprocessor should implement a `run` method, which
|
|
takes a root ElementTree. This method can return another ElementTree
|
|
object, and the existing root ElementTree will be replaced, or it can
|
|
modify the current tree and return None.
|
|
"""
|
|
pass
|
|
|
|
|
|
class InlineProcessor(Treeprocessor):
|
|
"""
|
|
A Treeprocessor that traverses a tree, applying inline patterns.
|
|
"""
|
|
|
|
def __init__ (self, md):
|
|
self.__placeholder_prefix = markdown.INLINE_PLACEHOLDER_PREFIX
|
|
self.__placeholder_suffix = markdown.ETX
|
|
self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
|
|
+ len(self.__placeholder_suffix)
|
|
self.__placeholder_re = re.compile(markdown.INLINE_PLACEHOLDER % r'([0-9]{4})')
|
|
self.markdown = md
|
|
|
|
def __makePlaceholder(self, type):
|
|
""" Generate a placeholder """
|
|
id = "%04d" % len(self.stashed_nodes)
|
|
hash = markdown.INLINE_PLACEHOLDER % id
|
|
return hash, id
|
|
|
|
def __findPlaceholder(self, data, index):
|
|
"""
|
|
Extract id from data string, start from index
|
|
|
|
Keyword arguments:
|
|
|
|
* data: string
|
|
* index: index, from which we start search
|
|
|
|
Returns: placeholder id and string index, after the found placeholder.
|
|
"""
|
|
|
|
m = self.__placeholder_re.search(data, index)
|
|
if m:
|
|
return m.group(1), m.end()
|
|
else:
|
|
return None, index + 1
|
|
|
|
def __stashNode(self, node, type):
|
|
""" Add node to stash """
|
|
placeholder, id = self.__makePlaceholder(type)
|
|
self.stashed_nodes[id] = node
|
|
return placeholder
|
|
|
|
def __handleInline(self, data, patternIndex=0):
|
|
"""
|
|
Process string with inline patterns and replace it
|
|
with placeholders
|
|
|
|
Keyword arguments:
|
|
|
|
* data: A line of Markdown text
|
|
* patternIndex: The index of the inlinePattern to start with
|
|
|
|
Returns: String with placeholders.
|
|
|
|
"""
|
|
if not isinstance(data, markdown.AtomicString):
|
|
startIndex = 0
|
|
while patternIndex < len(self.markdown.inlinePatterns):
|
|
data, matched, startIndex = self.__applyPattern(
|
|
self.markdown.inlinePatterns.value_for_index(patternIndex),
|
|
data, patternIndex, startIndex)
|
|
if not matched:
|
|
patternIndex += 1
|
|
return data
|
|
|
|
def __processElementText(self, node, subnode, isText=True):
|
|
"""
|
|
Process placeholders in Element.text or Element.tail
|
|
of Elements popped from self.stashed_nodes.
|
|
|
|
Keywords arguments:
|
|
|
|
* node: parent node
|
|
* subnode: processing node
|
|
* isText: bool variable, True - it's text, False - it's tail
|
|
|
|
Returns: None
|
|
|
|
"""
|
|
if isText:
|
|
text = subnode.text
|
|
subnode.text = None
|
|
else:
|
|
text = subnode.tail
|
|
subnode.tail = None
|
|
|
|
childResult = self.__processPlaceholders(text, subnode)
|
|
|
|
if not isText and node is not subnode:
|
|
pos = node.getchildren().index(subnode)
|
|
node.remove(subnode)
|
|
else:
|
|
pos = 0
|
|
|
|
childResult.reverse()
|
|
for newChild in childResult:
|
|
node.insert(pos, newChild)
|
|
|
|
def __processPlaceholders(self, data, parent):
|
|
"""
|
|
Process string with placeholders and generate ElementTree tree.
|
|
|
|
Keyword arguments:
|
|
|
|
* data: string with placeholders instead of ElementTree elements.
|
|
* parent: Element, which contains processing inline data
|
|
|
|
Returns: list with ElementTree elements with applied inline patterns.
|
|
"""
|
|
def linkText(text):
|
|
if text:
|
|
if result:
|
|
if result[-1].tail:
|
|
result[-1].tail += text
|
|
else:
|
|
result[-1].tail = text
|
|
else:
|
|
if parent.text:
|
|
parent.text += text
|
|
else:
|
|
parent.text = text
|
|
|
|
result = []
|
|
strartIndex = 0
|
|
while data:
|
|
index = data.find(self.__placeholder_prefix, strartIndex)
|
|
if index != -1:
|
|
id, phEndIndex = self.__findPlaceholder(data, index)
|
|
|
|
if id in self.stashed_nodes:
|
|
node = self.stashed_nodes.get(id)
|
|
|
|
if index > 0:
|
|
text = data[strartIndex:index]
|
|
linkText(text)
|
|
|
|
if not isString(node): # it's Element
|
|
for child in [node] + node.getchildren():
|
|
if child.tail:
|
|
if child.tail.strip():
|
|
self.__processElementText(node, child, False)
|
|
if child.text:
|
|
if child.text.strip():
|
|
self.__processElementText(child, child)
|
|
else: # it's just a string
|
|
linkText(node)
|
|
strartIndex = phEndIndex
|
|
continue
|
|
|
|
strartIndex = phEndIndex
|
|
result.append(node)
|
|
|
|
else: # wrong placeholder
|
|
end = index + len(prefix)
|
|
linkText(data[strartIndex:end])
|
|
strartIndex = end
|
|
else:
|
|
text = data[strartIndex:]
|
|
linkText(text)
|
|
data = ""
|
|
|
|
return result
|
|
|
|
def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
|
|
"""
|
|
Check if the line fits the pattern, create the necessary
|
|
elements, add it to stashed_nodes.
|
|
|
|
Keyword arguments:
|
|
|
|
* data: the text to be processed
|
|
* pattern: the pattern to be checked
|
|
* patternIndex: index of current pattern
|
|
* startIndex: string index, from which we starting search
|
|
|
|
Returns: String with placeholders instead of ElementTree elements.
|
|
|
|
"""
|
|
match = pattern.getCompiledRegExp().match(data[startIndex:])
|
|
leftData = data[:startIndex]
|
|
|
|
if not match:
|
|
return data, False, 0
|
|
|
|
node = pattern.handleMatch(match)
|
|
|
|
if node is None:
|
|
return data, True, len(leftData) + match.span(len(match.groups()))[0]
|
|
|
|
if not isString(node):
|
|
if not isinstance(node.text, markdown.AtomicString):
|
|
# We need to process current node too
|
|
for child in [node] + node.getchildren():
|
|
if not isString(node):
|
|
if child.text:
|
|
child.text = self.__handleInline(child.text,
|
|
patternIndex + 1)
|
|
if child.tail:
|
|
child.tail = self.__handleInline(child.tail,
|
|
patternIndex)
|
|
|
|
placeholder = self.__stashNode(node, pattern.type())
|
|
|
|
return "%s%s%s%s" % (leftData,
|
|
match.group(1),
|
|
placeholder, match.groups()[-1]), True, 0
|
|
|
|
def run(self, tree):
|
|
"""Apply inline patterns to a parsed Markdown tree.
|
|
|
|
Iterate over ElementTree, find elements with inline tag, apply inline
|
|
patterns and append newly created Elements to tree. If you don't
|
|
want process your data with inline paterns, instead of normal string,
|
|
use subclass AtomicString:
|
|
|
|
node.text = markdown.AtomicString("data won't be processed with inline patterns")
|
|
|
|
Arguments:
|
|
|
|
* markdownTree: ElementTree object, representing Markdown tree.
|
|
|
|
Returns: ElementTree object with applied inline patterns.
|
|
|
|
"""
|
|
self.stashed_nodes = {}
|
|
|
|
stack = [tree]
|
|
|
|
while stack:
|
|
currElement = stack.pop()
|
|
insertQueue = []
|
|
for child in currElement.getchildren():
|
|
if child.text and not isinstance(child.text, markdown.AtomicString):
|
|
text = child.text
|
|
child.text = None
|
|
lst = self.__processPlaceholders(self.__handleInline(
|
|
text), child)
|
|
stack += lst
|
|
insertQueue.append((child, lst))
|
|
|
|
if child.getchildren():
|
|
stack.append(child)
|
|
|
|
for element, lst in insertQueue:
|
|
if element.text:
|
|
element.text = \
|
|
markdown.inlinepatterns.handleAttributes(element.text,
|
|
element)
|
|
i = 0
|
|
for newChild in lst:
|
|
# Processing attributes
|
|
if newChild.tail:
|
|
newChild.tail = \
|
|
markdown.inlinepatterns.handleAttributes(newChild.tail,
|
|
element)
|
|
if newChild.text:
|
|
newChild.text = \
|
|
markdown.inlinepatterns.handleAttributes(newChild.text,
|
|
newChild)
|
|
element.insert(i, newChild)
|
|
i += 1
|
|
return tree
|
|
|
|
|
|
class PrettifyTreeprocessor(Treeprocessor):
|
|
""" Add linebreaks to the html document. """
|
|
|
|
def _prettifyETree(self, elem):
|
|
""" Recursively add linebreaks to ElementTree children. """
|
|
|
|
i = "\n"
|
|
if markdown.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
|
|
if (not elem.text or not elem.text.strip()) \
|
|
and len(elem) and markdown.isBlockLevel(elem[0].tag):
|
|
elem.text = i
|
|
for e in elem:
|
|
if markdown.isBlockLevel(e.tag):
|
|
self._prettifyETree(e)
|
|
if not elem.tail or not elem.tail.strip():
|
|
elem.tail = i
|
|
if not elem.tail or not elem.tail.strip():
|
|
elem.tail = i
|
|
|
|
def run(self, root):
|
|
""" Add linebreaks to ElementTree root object. """
|
|
|
|
self._prettifyETree(root)
|
|
# Do <br />'s seperately as they are often in the middle of
|
|
# inline content and missed by _prettifyETree.
|
|
brs = root.getiterator('br')
|
|
for br in brs:
|
|
if not br.tail or not br.tail.strip():
|
|
br.tail = '\n'
|
|
else:
|
|
br.tail = '\n%s' % br.tail
|