2017-10-15 17:27:55 +00:00
# -*- coding: utf-8 -*-
2017-02-07 15:40:46 +00:00
from __future__ import absolute_import
import unittest
import logging
import os
import sys
try :
from cStringIO import StringIO as cStringIO
except ImportError :
# Available only in Python 2.x, 3.x only has io.StringIO from below
cStringIO = None
from io import (
StringIO as uStringIO ,
open ,
)
logging . basicConfig ( level = logging . INFO )
from lark . lark import Lark
2019-03-02 12:55:48 +00:00
from lark . exceptions import GrammarError , ParseError , UnexpectedToken , UnexpectedInput , UnexpectedCharacters
2018-04-13 09:02:01 +00:00
from lark . tree import Tree
2019-05-12 17:32:50 +00:00
from lark . visitors import Transformer , Transformer_InPlace , v_args
2019-04-09 10:48:43 +00:00
from lark . grammar import Rule
2019-08-21 10:14:28 +00:00
from lark . lexer import TerminalDef , Lexer , TraditionalLexer
2017-02-07 15:40:46 +00:00
__path__ = os . path . dirname ( __file__ )
def _read ( n , * args ) :
with open ( os . path . join ( __path__ , n ) , * args ) as f :
return f . read ( )
2017-02-07 23:19:33 +00:00
class TestParsers ( unittest . TestCase ) :
def test_same_ast ( self ) :
2017-02-07 15:40:46 +00:00
" Tests that Earley and LALR parsers produce equal trees "
2017-10-30 09:14:42 +00:00
g = Lark ( r """ start: " ( " name_list ( " , " " * " NAME)? " ) "
2017-02-07 15:40:46 +00:00
name_list : NAME | name_list " , " NAME
NAME : / \w + / """ , parser= ' lalr ' )
l = g . parse ( ' (a,b,c,*x) ' )
2017-10-30 09:14:42 +00:00
g = Lark ( r """ start: " ( " name_list ( " , " " * " NAME)? " ) "
2017-02-07 15:40:46 +00:00
name_list : NAME | name_list " , " NAME
2017-03-01 16:14:47 +00:00
NAME : / \w / + """ )
2017-02-07 15:40:46 +00:00
l2 = g . parse ( ' (a,b,c,*x) ' )
assert l == l2 , ' %s != %s ' % ( l . pretty ( ) , l2 . pretty ( ) )
2017-03-09 07:45:56 +00:00
def test_infinite_recurse ( self ) :
g = """ start: a
a : a | " a "
"""
self . assertRaises ( GrammarError , Lark , g , parser = ' lalr ' )
2019-01-12 14:56:58 +00:00
# TODO: should it? shouldn't it?
2018-11-13 12:57:20 +00:00
# l = Lark(g, parser='earley', lexer='dynamic')
# self.assertRaises(ParseError, l.parse, 'a')
2017-03-09 07:45:56 +00:00
2017-10-14 11:21:28 +00:00
def test_propagate_positions ( self ) :
g = Lark ( """ start: a
a : " a "
""" , propagate_positions=True)
r = g . parse ( ' a ' )
2018-04-12 21:40:28 +00:00
self . assertEqual ( r . children [ 0 ] . meta . line , 1 )
2017-10-14 11:21:28 +00:00
def test_expand1 ( self ) :
g = Lark ( """ start: a
? a : b
b : " x "
""" )
r = g . parse ( ' x ' )
self . assertEqual ( r . children [ 0 ] . data , " b " )
g = Lark ( """ start: a
? a : b - > c
b : " x "
""" )
r = g . parse ( ' x ' )
2017-10-30 08:45:54 +00:00
self . assertEqual ( r . children [ 0 ] . data , " c " )
g = Lark ( """ start: a
? a : B - > c
B : " x "
""" )
self . assertEqual ( r . children [ 0 ] . data , " c " )
2017-10-14 11:21:28 +00:00
g = Lark ( """ start: a
? a : b b - > c
b : " x "
""" )
r = g . parse ( ' xx ' )
self . assertEqual ( r . children [ 0 ] . data , " c " )
2019-09-06 05:11:45 +00:00
def test_visit_tokens ( self ) :
class T ( Transformer ) :
def a ( self , children ) :
return children [ 0 ] + " ! "
def A ( self , tok ) :
return tok . upper ( )
# Test regular
g = Lark ( """ start: a
a : A
A : " x "
""" , parser= ' lalr ' )
r = T ( ) . transform ( g . parse ( " x " ) )
self . assertEqual ( r . children , [ " x! " ] )
r = T ( True ) . transform ( g . parse ( " x " ) )
self . assertEqual ( r . children , [ " X! " ] )
2017-10-14 11:21:28 +00:00
def test_embedded_transformer ( self ) :
class T ( Transformer ) :
def a ( self , children ) :
return " <a> "
def b ( self , children ) :
return " <b> "
def c ( self , children ) :
return " <c> "
# Test regular
g = Lark ( """ start: a
a : " x "
""" , parser= ' lalr ' )
r = T ( ) . transform ( g . parse ( " x " ) )
self . assertEqual ( r . children , [ " <a> " ] )
g = Lark ( """ start: a
a : " x "
""" , parser= ' lalr ' , transformer=T())
r = g . parse ( " x " )
self . assertEqual ( r . children , [ " <a> " ] )
# Test Expand1
g = Lark ( """ start: a
? a : b
b : " x "
""" , parser= ' lalr ' )
r = T ( ) . transform ( g . parse ( " x " ) )
self . assertEqual ( r . children , [ " <b> " ] )
2018-01-07 15:20:07 +00:00
2017-10-14 11:21:28 +00:00
g = Lark ( """ start: a
? a : b
b : " x "
""" , parser= ' lalr ' , transformer=T())
r = g . parse ( " x " )
self . assertEqual ( r . children , [ " <b> " ] )
# Test Expand1 -> Alias
g = Lark ( """ start: a
? a : b b - > c
b : " x "
""" , parser= ' lalr ' )
r = T ( ) . transform ( g . parse ( " xx " ) )
self . assertEqual ( r . children , [ " <c> " ] )
2018-01-07 15:20:07 +00:00
2017-10-14 11:21:28 +00:00
g = Lark ( """ start: a
? a : b b - > c
b : " x "
""" , parser= ' lalr ' , transformer=T())
r = g . parse ( " xx " )
self . assertEqual ( r . children , [ " <c> " ] )
2018-01-07 15:20:07 +00:00
2019-05-12 17:32:50 +00:00
def test_embedded_transformer_inplace ( self ) :
2019-05-12 19:11:13 +00:00
@v_args ( tree = True )
2019-05-12 17:32:50 +00:00
class T1 ( Transformer_InPlace ) :
def a ( self , tree ) :
2019-05-12 19:11:13 +00:00
assert isinstance ( tree , Tree ) , tree
2019-05-12 17:32:50 +00:00
tree . children . append ( " tested " )
2019-05-12 19:11:13 +00:00
return tree
def b ( self , tree ) :
return Tree ( tree . data , tree . children + [ ' tested2 ' ] )
2019-05-12 17:32:50 +00:00
@v_args ( tree = True )
class T2 ( Transformer ) :
def a ( self , tree ) :
assert isinstance ( tree , Tree )
tree . children . append ( " tested " )
2019-05-12 19:11:13 +00:00
return tree
def b ( self , tree ) :
return Tree ( tree . data , tree . children + [ ' tested2 ' ] )
2019-05-12 17:32:50 +00:00
class T3 ( Transformer ) :
@v_args ( tree = True )
def a ( self , tree ) :
assert isinstance ( tree , Tree )
tree . children . append ( " tested " )
2019-05-12 19:11:13 +00:00
return tree
@v_args ( tree = True )
def b ( self , tree ) :
return Tree ( tree . data , tree . children + [ ' tested2 ' ] )
2019-05-12 17:32:50 +00:00
for t in [ T1 ( ) , T2 ( ) , T3 ( ) ] :
2019-05-12 19:11:13 +00:00
for internal in [ False , True ] :
g = Lark ( """ start: a b
a : " x "
b : " y "
""" , parser= ' lalr ' , transformer=t if internal else None)
r = g . parse ( " xy " )
if not internal :
r = t . transform ( r )
a , b = r . children
self . assertEqual ( a . children , [ " tested " ] )
self . assertEqual ( b . children , [ " tested2 " ] )
2017-10-14 11:21:28 +00:00
2018-07-30 14:54:12 +00:00
def test_alias ( self ) :
Lark ( """ start: [ " a " ] " b " [ " c " ] " e " [ " f " ] [ " g " ] [ " h " ] " x " -> d """ )
2017-10-14 11:21:28 +00:00
2017-02-26 09:56:04 +00:00
2017-08-04 12:32:39 +00:00
def _make_full_earley_test ( LEXER ) :
2018-11-13 22:44:50 +00:00
def _Lark ( grammar , * * kwargs ) :
return Lark ( grammar , lexer = LEXER , parser = ' earley ' , propagate_positions = True , * * kwargs )
2017-08-04 12:32:39 +00:00
class _TestFullEarley ( unittest . TestCase ) :
2018-05-05 13:52:39 +00:00
def test_anon ( self ) :
2017-08-04 12:32:39 +00:00
# Fails an Earley implementation without special handling for empty rules,
# or re-processing of already completed rules.
g = Lark ( r """ start: B
2018-01-17 08:38:51 +00:00
B : ( " ab " | / [ ^ b ] / ) +
2017-08-04 12:32:39 +00:00
""" , lexer=LEXER)
2017-02-26 19:57:25 +00:00
2017-08-04 12:32:39 +00:00
self . assertEqual ( g . parse ( ' abc ' ) . children [ 0 ] , ' abc ' )
2017-02-07 23:19:33 +00:00
2018-05-05 13:52:39 +00:00
def test_earley ( self ) :
2017-08-04 12:32:39 +00:00
g = Lark ( """ start: A " b " c
A : " a " +
c : " abc "
""" , parser= " earley " , lexer=LEXER)
x = g . parse ( ' aaaababc ' )
2017-03-01 16:14:47 +00:00
2018-05-05 13:52:39 +00:00
def test_earley2 ( self ) :
2017-08-04 12:32:39 +00:00
grammar = """
start : statement +
2017-03-04 17:35:06 +00:00
2017-08-04 12:32:39 +00:00
statement : " r "
| " c " / [ a - z ] / +
2017-03-04 17:35:06 +00:00
2017-08-04 12:32:39 +00:00
% ignore " "
"""
2017-03-04 17:35:06 +00:00
2017-08-04 12:32:39 +00:00
program = """ c b r """
2017-03-04 17:35:06 +00:00
2017-08-04 12:32:39 +00:00
l = Lark ( grammar , parser = ' earley ' , lexer = LEXER )
l . parse ( program )
2017-02-26 09:56:04 +00:00
2017-10-31 11:36:54 +00:00
2018-06-27 13:31:02 +00:00
@unittest.skipIf ( LEXER == ' dynamic ' , " Only relevant for the dynamic_complete parser " )
2018-05-05 13:52:39 +00:00
def test_earley3 ( self ) :
2019-01-12 14:56:58 +00:00
""" Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
2017-03-06 10:32:12 +00:00
2019-01-12 14:56:58 +00:00
By default , ` + ` should immitate regexp greedy - matching
"""
2018-05-05 13:52:39 +00:00
grammar = """
start : A A
A : " a " +
"""
2017-03-06 10:32:12 +00:00
2018-05-05 13:52:39 +00:00
l = Lark ( grammar , parser = ' earley ' , lexer = LEXER )
res = l . parse ( " aaa " )
2019-01-20 12:47:52 +00:00
self . assertEqual ( set ( res . children ) , { ' aa ' , ' a ' } )
# XXX TODO fix Earley to maintain correct order
# i.e. terminals it imitate greedy search for terminals, but lazy search for rules
# self.assertEqual(res.children, ['aa', 'a'])
2017-03-06 10:32:12 +00:00
2018-05-05 13:52:39 +00:00
def test_earley4 ( self ) :
2017-08-04 12:32:39 +00:00
grammar = """
start : A A ?
A : " a " +
"""
l = Lark ( grammar , parser = ' earley ' , lexer = LEXER )
res = l . parse ( " aaa " )
2019-01-20 12:47:52 +00:00
assert set ( res . children ) == { ' aa ' , ' a ' } or res . children == [ ' aaa ' ]
# XXX TODO fix Earley to maintain correct order
# i.e. terminals it imitate greedy search for terminals, but lazy search for rules
# self.assertEqual(res.children, ['aaa'])
2017-03-06 10:32:12 +00:00
2017-08-04 12:32:39 +00:00
def test_earley_repeating_empty ( self ) :
# This was a sneaky bug!
2017-03-06 10:32:12 +00:00
2017-08-04 12:32:39 +00:00
grammar = """
! start : " a " empty empty " b "
empty : empty2
empty2 :
"""
2017-04-05 14:32:56 +00:00
2017-08-04 12:32:39 +00:00
parser = Lark ( grammar , parser = ' earley ' , lexer = LEXER )
res = parser . parse ( ' ab ' )
2017-04-05 14:32:56 +00:00
2017-08-04 12:32:39 +00:00
empty_tree = Tree ( ' empty ' , [ Tree ( ' empty2 ' , [ ] ) ] )
self . assertSequenceEqual ( res . children , [ ' a ' , empty_tree , empty_tree , ' b ' ] )
2017-04-05 14:32:56 +00:00
2018-12-18 13:06:19 +00:00
@unittest.skipIf ( LEXER == ' standard ' , " Requires dynamic lexer " )
2017-08-04 12:32:39 +00:00
def test_earley_explicit_ambiguity ( self ) :
# This was a sneaky bug!
2017-04-05 14:32:56 +00:00
2017-08-04 12:32:39 +00:00
grammar = """
start : a b | ab
a : " a "
b : " b "
ab : " ab "
"""
2017-04-17 23:17:17 +00:00
2017-08-04 12:32:39 +00:00
parser = Lark ( grammar , parser = ' earley ' , lexer = LEXER , ambiguity = ' explicit ' )
2018-11-13 22:44:50 +00:00
ambig_tree = parser . parse ( ' ab ' )
2018-09-18 22:50:09 +00:00
self . assertEqual ( ambig_tree . data , ' _ambig ' )
self . assertEqual ( len ( ambig_tree . children ) , 2 )
2017-04-17 23:17:17 +00:00
2018-12-18 13:06:19 +00:00
@unittest.skipIf ( LEXER == ' standard ' , " Requires dynamic lexer " )
2017-08-23 07:52:46 +00:00
def test_ambiguity1 ( self ) :
grammar = """
start : cd + " e "
! cd : " c "
| " d "
| " cd "
"""
l = Lark ( grammar , parser = ' earley ' , ambiguity = ' explicit ' , lexer = LEXER )
2018-11-13 22:44:50 +00:00
ambig_tree = l . parse ( ' cde ' )
2018-09-18 22:50:09 +00:00
assert ambig_tree . data == ' _ambig ' , ambig_tree
assert len ( ambig_tree . children ) == 2
2018-12-18 13:06:19 +00:00
@unittest.skipIf ( LEXER == ' standard ' , " Requires dynamic lexer " )
2018-09-18 22:50:09 +00:00
def test_ambiguity2 ( self ) :
grammar = """
ANY : / [ a - zA - Z0 - 9 ] + /
a .2 : " A " b +
2019-01-20 12:47:52 +00:00
b .2 : " B "
2018-09-18 22:50:09 +00:00
c : ANY
start : ( a | c ) *
"""
l = Lark ( grammar , parser = ' earley ' , lexer = LEXER )
res = l . parse ( ' ABX ' )
expected = Tree ( ' start ' , [
Tree ( ' a ' , [
Tree ( ' b ' , [ ] )
] ) ,
Tree ( ' c ' , [
' X '
] )
] )
self . assertEqual ( res , expected )
2017-08-23 07:52:46 +00:00
2017-10-31 10:28:11 +00:00
def test_fruitflies_ambig ( self ) :
grammar = """
start : noun verb noun - > simple
| noun verb " like " noun - > comparative
noun : adj ? NOUN
verb : VERB
adj : ADJ
NOUN : " flies " | " bananas " | " fruit "
VERB : " like " | " flies "
ADJ : " fruit "
% import common . WS
% ignore WS
"""
parser = Lark ( grammar , ambiguity = ' explicit ' , lexer = LEXER )
2018-11-13 22:44:50 +00:00
tree = parser . parse ( ' fruit flies like bananas ' )
2017-10-31 10:28:11 +00:00
expected = Tree ( ' _ambig ' , [
Tree ( ' comparative ' , [
Tree ( ' noun ' , [ ' fruit ' ] ) ,
Tree ( ' verb ' , [ ' flies ' ] ) ,
Tree ( ' noun ' , [ ' bananas ' ] )
] ) ,
Tree ( ' simple ' , [
Tree ( ' noun ' , [ Tree ( ' adj ' , [ ' fruit ' ] ) , ' flies ' ] ) ,
Tree ( ' verb ' , [ ' like ' ] ) ,
Tree ( ' noun ' , [ ' bananas ' ] )
] )
] )
2018-10-24 13:28:54 +00:00
# self.assertEqual(tree, expected)
self . assertEqual ( tree . data , expected . data )
self . assertEqual ( set ( tree . children ) , set ( expected . children ) )
2017-10-31 10:28:11 +00:00
2018-12-18 13:06:19 +00:00
@unittest.skipIf ( LEXER != ' dynamic_complete ' , " Only relevant for the dynamic_complete parser " )
2018-04-05 13:09:42 +00:00
def test_explicit_ambiguity2 ( self ) :
2018-04-05 12:40:33 +00:00
grammar = r """
start : NAME +
NAME : / \w + /
% ignore " "
"""
text = """ cat """
2018-11-13 22:44:50 +00:00
parser = _Lark ( grammar , start = ' start ' , ambiguity = ' explicit ' )
tree = parser . parse ( text )
2018-04-05 12:40:33 +00:00
self . assertEqual ( tree . data , ' _ambig ' )
combinations = { tuple ( str ( s ) for s in t . children ) for t in tree . children }
self . assertEqual ( combinations , {
( ' cat ' , ) ,
( ' ca ' , ' t ' ) ,
( ' c ' , ' at ' ) ,
( ' c ' , ' a ' , ' t ' )
} )
2018-04-05 13:09:42 +00:00
def test_term_ambig_resolve ( self ) :
grammar = r """
! start : NAME +
NAME : / \w + /
% ignore " "
"""
text = """ foo bar """
parser = Lark ( grammar )
tree = parser . parse ( text )
self . assertEqual ( tree . children , [ ' foo ' , ' bar ' ] )
2018-04-05 12:40:33 +00:00
2017-10-31 10:28:11 +00:00
2017-10-29 22:39:57 +00:00
# @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
# def test_not_all_derivations(self):
# grammar = """
# start: cd+ "e"
# !cd: "c"
# | "d"
# | "cd"
# """
# l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
# x = l.parse('cde')
# assert x.data != '_ambig', x
# assert len(x.children) == 1
2017-08-23 07:52:46 +00:00
2018-05-05 13:52:39 +00:00
_NAME = " TestFullEarley " + LEXER . capitalize ( )
2017-08-04 12:32:39 +00:00
_TestFullEarley . __name__ = _NAME
globals ( ) [ _NAME ] = _TestFullEarley
2017-04-17 23:17:17 +00:00
2019-08-21 10:14:28 +00:00
class CustomLexer ( Lexer ) :
"""
Purpose of this custom lexer is to test the integration ,
so it uses the traditionalparser as implementation without custom lexing behaviour .
"""
def __init__ ( self , lexer_conf ) :
self . lexer = TraditionalLexer ( lexer_conf . tokens , ignore = lexer_conf . ignore , user_callbacks = lexer_conf . callbacks )
def lex ( self , * args , * * kwargs ) :
return self . lexer . lex ( * args , * * kwargs )
2017-04-17 23:17:17 +00:00
2017-02-25 16:35:31 +00:00
def _make_parser_test ( LEXER , PARSER ) :
2019-08-21 10:14:28 +00:00
lexer_class_or_name = CustomLexer if LEXER == ' custom ' else LEXER
2017-02-07 23:19:33 +00:00
def _Lark ( grammar , * * kwargs ) :
2019-08-21 10:14:28 +00:00
return Lark ( grammar , lexer = lexer_class_or_name , parser = PARSER , propagate_positions = True , * * kwargs )
2019-03-02 12:55:48 +00:00
def _Lark_open ( gfilename , * * kwargs ) :
2019-08-21 10:14:28 +00:00
return Lark . open ( gfilename , lexer = lexer_class_or_name , parser = PARSER , propagate_positions = True , * * kwargs )
2017-02-07 23:19:33 +00:00
class _TestParser ( unittest . TestCase ) :
def test_basic1 ( self ) :
g = _Lark ( """ start: a+ b a* " b " a*
b : " b "
a : " a "
2017-02-07 15:40:46 +00:00
""" )
2017-05-01 14:08:10 +00:00
2017-02-07 23:19:33 +00:00
r = g . parse ( ' aaabaab ' )
self . assertEqual ( ' ' . join ( x . data for x in r . children ) , ' aaabaa ' )
r = g . parse ( ' aaabaaba ' )
self . assertEqual ( ' ' . join ( x . data for x in r . children ) , ' aaabaaa ' )
self . assertRaises ( ParseError , g . parse , ' aaabaa ' )
def test_basic2 ( self ) :
# Multiple parsers and colliding tokens
g = _Lark ( """ start: B A
2017-03-01 16:14:47 +00:00
B : " 12 "
A : " 1 " """ )
2017-02-07 23:19:33 +00:00
g2 = _Lark ( """ start: B A
B : " 12 "
A : " 2 " """ )
x = g . parse ( ' 121 ' )
assert x . data == ' start ' and x . children == [ ' 12 ' , ' 1 ' ] , x
x = g2 . parse ( ' 122 ' )
assert x . data == ' start ' and x . children == [ ' 12 ' , ' 2 ' ] , x
@unittest.skipIf ( cStringIO is None , " cStringIO not available " )
def test_stringio_bytes ( self ) :
""" Verify that a Lark can be created from file-like objects other than Python ' s standard ' file ' object """
_Lark ( cStringIO ( b ' start: a+ b a* " b " a* \n b: " b " \n a: " a " ' ) )
def test_stringio_unicode ( self ) :
""" Verify that a Lark can be created from file-like objects other than Python ' s standard ' file ' object """
_Lark ( uStringIO ( u ' start: a+ b a* " b " a* \n b: " b " \n a: " a " ' ) )
def test_unicode ( self ) :
g = _Lark ( u """ start: UNIA UNIB UNIA
UNIA : / \xa3 /
UNIB : / \u0101 /
""" )
g . parse ( u ' \xa3 \u0101 \u00a3 ' )
def test_unicode2 ( self ) :
g = _Lark ( r """ start: UNIA UNIB UNIA UNIC
UNIA : / \xa3 /
UNIB : " a \u0101 b \ "
UNIC : / a ? \u0101c \n /
""" )
g . parse ( u ' \xa3 a \u0101 b \\ \u00a3 \u0101 c \n ' )
2017-02-26 19:57:25 +00:00
def test_unicode3 ( self ) :
g = _Lark ( r """ start: UNIA UNIB UNIA UNIC
UNIA : / \xa3 /
UNIB : " \u0101 "
UNIC : / \u0203 / / \n /
""" )
g . parse ( u ' \xa3 \u0101 \u00a3 \u0203 \n ' )
2019-02-12 20:21:15 +00:00
def test_hex_escape ( self ) :
2019-02-13 11:35:32 +00:00
g = _Lark ( r """ start: A B C
2019-02-12 20:21:15 +00:00
A : " \x01 "
B : / \x02 /
2019-02-13 11:35:32 +00:00
C : " \xAB CD "
2019-02-12 20:21:15 +00:00
""" )
2019-02-13 11:35:32 +00:00
g . parse ( ' \x01 \x02 \xAB CD ' )
2017-02-07 23:19:33 +00:00
2019-02-23 23:32:31 +00:00
def test_unicode_literal_range_escape ( self ) :
g = _Lark ( r """ start: A+
A : " \u0061 " . . " \u0063 "
""" )
g . parse ( ' abc ' )
def test_hex_literal_range_escape ( self ) :
g = _Lark ( r """ start: A+
A : " \x01 " . . " \x03 "
""" )
g . parse ( ' \x01 \x02 \x03 ' )
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " Takes forever " )
2017-03-01 16:14:47 +00:00
def test_stack_for_ebnf ( self ) :
""" Verify that stack depth isn ' t an issue for EBNF grammars """
g = _Lark ( r """ start: a+
2017-02-07 23:19:33 +00:00
a : " a " """ )
2017-03-01 16:14:47 +00:00
g . parse ( " a " * ( sys . getrecursionlimit ( ) * 2 ) )
2017-02-07 23:19:33 +00:00
def test_expand1_lists_with_one_item ( self ) :
g = _Lark ( r """ start: list
? list : item +
item : A
A : " a "
""" )
r = g . parse ( " a " )
# because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' item ' , ) )
# regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
self . assertEqual ( len ( r . children ) , 1 )
def test_expand1_lists_with_one_item_2 ( self ) :
g = _Lark ( r """ start: list
? list : item + " ! "
item : A
A : " a "
""" )
r = g . parse ( " a! " )
# because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' item ' , ) )
# regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
self . assertEqual ( len ( r . children ) , 1 )
def test_dont_expand1_lists_with_multiple_items ( self ) :
g = _Lark ( r """ start: list
? list : item +
item : A
A : " a "
""" )
r = g . parse ( " aa " )
# because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' list ' , ) )
# regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
self . assertEqual ( len ( r . children ) , 1 )
# Sanity check: verify that 'list' contains the two 'item's we've given it
[ list ] = r . children
self . assertSequenceEqual ( [ item . data for item in list . children ] , ( ' item ' , ' item ' ) )
def test_dont_expand1_lists_with_multiple_items_2 ( self ) :
g = _Lark ( r """ start: list
? list : item + " ! "
item : A
A : " a "
""" )
r = g . parse ( " aa! " )
# because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' list ' , ) )
# regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
self . assertEqual ( len ( r . children ) , 1 )
# Sanity check: verify that 'list' contains the two 'item's we've given it
[ list ] = r . children
self . assertSequenceEqual ( [ item . data for item in list . children ] , ( ' item ' , ' item ' ) )
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " No empty rules " )
2017-02-07 23:19:33 +00:00
def test_empty_expand1_list ( self ) :
g = _Lark ( r """ start: list
? list : item *
item : A
A : " a "
""" )
r = g . parse ( " " )
# because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' list ' , ) )
# regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
self . assertEqual ( len ( r . children ) , 1 )
# Sanity check: verify that 'list' contains no 'item's as we've given it none
[ list ] = r . children
self . assertSequenceEqual ( [ item . data for item in list . children ] , ( ) )
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " No empty rules " )
2017-02-07 23:19:33 +00:00
def test_empty_expand1_list_2 ( self ) :
g = _Lark ( r """ start: list
? list : item * " ! " ?
item : A
A : " a "
""" )
r = g . parse ( " " )
# because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' list ' , ) )
# regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
self . assertEqual ( len ( r . children ) , 1 )
# Sanity check: verify that 'list' contains no 'item's as we've given it none
[ list ] = r . children
self . assertSequenceEqual ( [ item . data for item in list . children ] , ( ) )
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " No empty rules " )
2017-02-07 23:19:33 +00:00
def test_empty_flatten_list ( self ) :
g = _Lark ( r """ start: list
list : | item " , " list
item : A
A : " a "
""" )
r = g . parse ( " " )
# Because 'list' is a flatten rule it's top-level element should *never* be expanded
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' list ' , ) )
# Sanity check: verify that 'list' contains no 'item's as we've given it none
[ list ] = r . children
self . assertSequenceEqual ( [ item . data for item in list . children ] , ( ) )
@unittest.skipIf ( True , " Flattening list isn ' t implemented (and may never be) " )
def test_single_item_flatten_list ( self ) :
g = _Lark ( r """ start: list
list : | item " , " list
item : A
A : " a "
""" )
r = g . parse ( " a, " )
# Because 'list' is a flatten rule it's top-level element should *never* be expanded
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' list ' , ) )
# Sanity check: verify that 'list' contains exactly the one 'item' we've given it
[ list ] = r . children
self . assertSequenceEqual ( [ item . data for item in list . children ] , ( ' item ' , ) )
@unittest.skipIf ( True , " Flattening list isn ' t implemented (and may never be) " )
def test_multiple_item_flatten_list ( self ) :
g = _Lark ( r """ start: list
#list: | item "," list
item : A
A : " a "
""" )
r = g . parse ( " a,a, " )
# Because 'list' is a flatten rule it's top-level element should *never* be expanded
self . assertSequenceEqual ( [ subtree . data for subtree in r . children ] , ( ' list ' , ) )
# Sanity check: verify that 'list' contains exactly the two 'item's we've given it
[ list ] = r . children
self . assertSequenceEqual ( [ item . data for item in list . children ] , ( ' item ' , ' item ' ) )
@unittest.skipIf ( True , " Flattening list isn ' t implemented (and may never be) " )
def test_recurse_flatten ( self ) :
""" Verify that stack depth doesn ' t get exceeded on recursive rules marked for flattening. """
g = _Lark ( r """ start: a | start a
a : A
A : " a " """ )
# Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
# STree data structures, which uses recursion).
g . parse ( " a " * ( sys . getrecursionlimit ( ) / / 4 ) )
def test_token_collision ( self ) :
2017-10-30 09:14:42 +00:00
g = _Lark ( r """ start: " Hello " NAME
2017-02-26 19:57:25 +00:00
NAME : / \w / +
2017-02-23 11:00:16 +00:00
% ignore " "
2017-02-07 23:19:33 +00:00
""" )
x = g . parse ( ' Hello World ' )
self . assertSequenceEqual ( x . children , [ ' World ' ] )
x = g . parse ( ' Hello HelloWorld ' )
self . assertSequenceEqual ( x . children , [ ' HelloWorld ' ] )
2017-08-15 15:48:03 +00:00
def test_token_collision_WS ( self ) :
2017-10-30 09:14:42 +00:00
g = _Lark ( r """ start: " Hello " NAME
2017-08-15 15:48:03 +00:00
NAME : / \w / +
% import common . WS
% ignore WS
""" )
x = g . parse ( ' Hello World ' )
self . assertSequenceEqual ( x . children , [ ' World ' ] )
x = g . parse ( ' Hello HelloWorld ' )
self . assertSequenceEqual ( x . children , [ ' HelloWorld ' ] )
2017-07-26 07:17:27 +00:00
def test_token_collision2 ( self ) :
g = _Lark ( """
! start : " starts "
% import common . LCASE_LETTER
""" )
x = g . parse ( " starts " )
self . assertSequenceEqual ( x . children , [ ' starts ' ] )
2017-02-23 22:45:34 +00:00
# def test_string_priority(self):
# g = _Lark("""start: (A | /a?bb/)+
# A: "a" """)
# x = g.parse('abb')
# self.assertEqual(len(x.children), 2)
# # This parse raises an exception because the lexer will always try to consume
# # "a" first and will never match the regular expression
# # This behavior is subject to change!!
# # Thie won't happen with ambiguity handling.
# g = _Lark("""start: (A | /a?ab/)+
# A: "a" """)
# self.assertRaises(LexError, g.parse, 'aab')
2017-02-07 23:19:33 +00:00
def test_undefined_rule ( self ) :
self . assertRaises ( GrammarError , _Lark , """ start: a """ )
def test_undefined_token ( self ) :
self . assertRaises ( GrammarError , _Lark , """ start: A """ )
def test_rule_collision ( self ) :
g = _Lark ( """ start: " a " + " b "
| " a " + """ )
x = g . parse ( ' aaaa ' )
x = g . parse ( ' aaaab ' )
def test_rule_collision2 ( self ) :
g = _Lark ( """ start: " a " * " b "
| " a " + """ )
x = g . parse ( ' aaaa ' )
x = g . parse ( ' aaaab ' )
x = g . parse ( ' b ' )
def test_token_not_anon ( self ) :
2018-05-08 09:05:11 +00:00
""" Tests that " a " is matched as an anonymous token, and not A.
2017-02-07 23:19:33 +00:00
"""
g = _Lark ( """ start: " a "
A : " a " """ )
x = g . parse ( ' a ' )
2018-05-08 09:05:11 +00:00
self . assertEqual ( len ( x . children ) , 0 , ' " a " should be considered anonymous ' )
2017-02-23 21:50:52 +00:00
2018-05-08 09:05:11 +00:00
g = _Lark ( """ start: " a " A
A : " a " """ )
x = g . parse ( ' aa ' )
self . assertEqual ( len ( x . children ) , 1 , ' only " a " should be considered anonymous ' )
2017-02-07 23:19:33 +00:00
self . assertEqual ( x . children [ 0 ] . type , " A " )
2017-02-23 21:50:52 +00:00
g = _Lark ( """ start: /a/
A : / a / """ )
x = g . parse ( ' a ' )
2018-05-08 09:05:11 +00:00
self . assertEqual ( len ( x . children ) , 1 )
self . assertEqual ( x . children [ 0 ] . type , " A " , " A isn ' t associated with /a/ " )
2017-02-23 21:50:52 +00:00
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " No empty rules " )
2017-02-07 23:19:33 +00:00
def test_maybe ( self ) :
g = _Lark ( """ start: [ " a " ] """ )
x = g . parse ( ' a ' )
x = g . parse ( ' ' )
def test_start ( self ) :
g = _Lark ( """ a: " a " a? """ , start = ' a ' )
x = g . parse ( ' a ' )
x = g . parse ( ' aa ' )
x = g . parse ( ' aaa ' )
def test_alias ( self ) :
g = _Lark ( """ start: " a " -> b """ )
x = g . parse ( ' a ' )
self . assertEqual ( x . data , " b " )
2017-02-23 11:00:16 +00:00
def test_token_ebnf ( self ) :
g = _Lark ( """ start: A
A : " a " * ( " b " ? " c " . . " e " ) +
""" )
x = g . parse ( ' abcde ' )
x = g . parse ( ' dd ' )
2017-02-26 15:30:40 +00:00
def test_backslash ( self ) :
g = _Lark ( r """ start: " \ \ " " a "
""" )
x = g . parse ( r ' \ a ' )
2017-11-16 10:40:15 +00:00
g = _Lark ( r """ start: / \ \ / /a/
2017-02-26 15:30:40 +00:00
""" )
x = g . parse ( r ' \ a ' )
def test_backslash2 ( self ) :
g = _Lark ( r """ start: " \ " " " - "
""" )
x = g . parse ( ' " - ' )
g = _Lark ( r """ start: / \ // /-/
""" )
x = g . parse ( ' /- ' )
2019-01-31 22:03:45 +00:00
def test_special_chars ( self ) :
g = _Lark ( r """ start: " \ n "
""" )
x = g . parse ( ' \n ' )
g = _Lark ( r """ start: / \ n/
""" )
x = g . parse ( ' \n ' )
2017-02-23 11:00:16 +00:00
# def test_token_recurse(self):
# g = _Lark("""start: A
# A: B
# B: A
# """)
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " No empty rules " )
2017-02-28 07:09:35 +00:00
def test_empty ( self ) :
# Fails an Earley implementation without special handling for empty rules,
# or re-processing of already completed rules.
g = _Lark ( r """ start: _empty a " B "
a : _empty " A "
2017-02-26 19:57:25 +00:00
_empty :
2017-02-28 07:09:35 +00:00
""" )
x = g . parse ( ' AB ' )
2017-12-14 15:20:03 +00:00
def test_regex_quote ( self ) :
g = r """
start : SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
SINGLE_QUOTED_STRING : / ' [^ ' ] * ' /
DOUBLE_QUOTED_STRING : / " [^ " ] * " /
"""
g = _Lark ( g )
self . assertEqual ( g . parse ( ' " hello " ' ) . children , [ ' " hello " ' ] )
self . assertEqual ( g . parse ( " ' hello ' " ) . children , [ " ' hello ' " ] )
2017-02-10 09:50:50 +00:00
def test_lexer_token_limit ( self ) :
" Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation "
tokens = { ' A %d ' % i : ' " %d " ' % i for i in range ( 300 ) }
g = _Lark ( """ start: %s
% s """ % ( ' ' .join(tokens), ' \n ' .join( " %s : %s " %x for x in tokens.items())))
2017-02-11 22:35:38 +00:00
def test_float_without_lexer ( self ) :
2019-03-02 12:55:48 +00:00
expected_error = UnexpectedCharacters if LEXER . startswith ( ' dynamic ' ) else UnexpectedToken
2018-01-23 08:19:21 +00:00
if PARSER == ' cyk ' :
expected_error = ParseError
2017-11-17 08:43:41 +00:00
2017-02-11 22:35:38 +00:00
g = _Lark ( """ start: [ " + " | " - " ] float
float : digit * " . " digit + exp ?
| digit + exp
exp : ( " e " | " E " ) [ " + " | " - " ] digit +
digit : " 0 " | " 1 " | " 2 " | " 3 " | " 4 " | " 5 " | " 6 " | " 7 " | " 8 " | " 9 "
""" )
g . parse ( " 1.2 " )
g . parse ( " -.2e9 " )
g . parse ( " +2e-9 " )
2017-11-17 08:43:41 +00:00
self . assertRaises ( expected_error , g . parse , " +2e-9e " )
2017-02-10 09:50:50 +00:00
2017-05-28 21:05:54 +00:00
def test_keep_all_tokens ( self ) :
l = _Lark ( """ start: " a " + """ , keep_all_tokens = True )
tree = l . parse ( ' aaa ' )
self . assertEqual ( tree . children , [ ' a ' , ' a ' , ' a ' ] )
2017-03-09 16:15:55 +00:00
def test_token_flags ( self ) :
l = _Lark ( """ !start: " a " i+
"""
)
tree = l . parse ( ' aA ' )
self . assertEqual ( tree . children , [ ' a ' , ' A ' ] )
l = _Lark ( """ !start: /a/i+
"""
)
tree = l . parse ( ' aA ' )
self . assertEqual ( tree . children , [ ' a ' , ' A ' ] )
2017-11-20 11:01:07 +00:00
# g = """!start: "a"i "a"
# """
# self.assertRaises(GrammarError, _Lark, g)
2017-03-09 16:15:55 +00:00
2017-11-20 11:01:07 +00:00
# g = """!start: /a/i /a/
# """
# self.assertRaises(GrammarError, _Lark, g)
2017-03-09 16:15:55 +00:00
2017-03-09 17:10:01 +00:00
g = """ start: NAME " , " " a "
NAME : / [ a - z_ ] / i / [ a - z0 - 9 _ ] / i *
"""
l = _Lark ( g )
tree = l . parse ( ' ab,a ' )
self . assertEqual ( tree . children , [ ' ab ' ] )
tree = l . parse ( ' AB,a ' )
self . assertEqual ( tree . children , [ ' AB ' ] )
2017-05-06 14:04:19 +00:00
def test_token_flags3 ( self ) :
l = _Lark ( """ !start: ABC+
ABC : " abc " i
"""
)
tree = l . parse ( ' aBcAbC ' )
self . assertEqual ( tree . children , [ ' aBc ' , ' AbC ' ] )
2017-03-09 17:10:01 +00:00
def test_token_flags2 ( self ) :
g = """ !start: ( " a " i | /a/ /b/?)+
"""
l = _Lark ( g )
tree = l . parse ( ' aA ' )
self . assertEqual ( tree . children , [ ' a ' , ' A ' ] )
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " No empty rules " )
2018-01-15 22:52:31 +00:00
def test_twice_empty ( self ) :
g = """ !start: [[ " A " ]]
"""
l = _Lark ( g )
tree = l . parse ( ' A ' )
self . assertEqual ( tree . children , [ ' A ' ] )
tree = l . parse ( ' ' )
self . assertEqual ( tree . children , [ ] )
2018-01-07 15:20:07 +00:00
def test_undefined_ignore ( self ) :
g = """ !start: " A "
% ignore B
"""
self . assertRaises ( GrammarError , _Lark , g )
2018-03-31 21:06:31 +00:00
def test_alias_in_terminal ( self ) :
g = """ start: TERM
TERM : " a " - > alias
"""
self . assertRaises ( GrammarError , _Lark , g )
2018-01-07 15:20:07 +00:00
def test_line_and_column ( self ) :
g = r """ !start: " A " bc " D "
! bc : " B \n C "
"""
l = _Lark ( g )
a , bc , d = l . parse ( " AB \n CD " ) . children
self . assertEqual ( a . line , 1 )
2018-05-10 09:34:19 +00:00
self . assertEqual ( a . column , 1 )
2018-01-07 15:20:07 +00:00
bc , = bc . children
self . assertEqual ( bc . line , 1 )
2018-05-10 09:34:19 +00:00
self . assertEqual ( bc . column , 2 )
2018-01-07 15:20:07 +00:00
self . assertEqual ( d . line , 2 )
2018-05-10 09:34:19 +00:00
self . assertEqual ( d . column , 2 )
2018-01-07 15:20:07 +00:00
2018-01-28 13:42:40 +00:00
if LEXER != ' dynamic ' :
self . assertEqual ( a . end_line , 1 )
2018-05-10 09:34:19 +00:00
self . assertEqual ( a . end_column , 2 )
2018-01-28 13:42:40 +00:00
self . assertEqual ( bc . end_line , 2 )
2018-05-10 09:34:19 +00:00
self . assertEqual ( bc . end_column , 2 )
2018-01-28 13:42:40 +00:00
self . assertEqual ( d . end_line , 2 )
2018-05-10 09:34:19 +00:00
self . assertEqual ( d . end_column , 3 )
2018-01-07 15:20:07 +00:00
2017-05-22 09:26:24 +00:00
def test_reduce_cycle ( self ) :
""" Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
It seems that the correct solution is to explicitely distinguish finalization in the reduce ( ) function .
"""
l = _Lark ( """
term : A
| term term
A : " a "
""" , start= ' term ' )
tree = l . parse ( " aa " )
self . assertEqual ( len ( tree . children ) , 2 )
2017-03-09 16:15:55 +00:00
2017-08-22 14:47:22 +00:00
@unittest.skipIf ( LEXER != ' standard ' , " Only standard lexers care about token priority " )
def test_lexer_prioritization ( self ) :
" Tests effect of priority on result "
grammar = """
start : A B | AB
A .2 : " a "
B : " b "
AB : " ab "
"""
l = _Lark ( grammar )
res = l . parse ( " ab " )
self . assertEqual ( res . children , [ ' a ' , ' b ' ] )
self . assertNotEqual ( res . children , [ ' ab ' ] )
grammar = """
start : A B | AB
A : " a "
B : " b "
AB .3 : " ab "
"""
l = _Lark ( grammar )
res = l . parse ( " ab " )
self . assertNotEqual ( res . children , [ ' a ' , ' b ' ] )
self . assertEqual ( res . children , [ ' ab ' ] )
2019-07-20 22:50:28 +00:00
grammar = """
start : A B | AB
A : " a "
B . - 20 : " b "
AB . - 10 : " ab "
"""
l = _Lark ( grammar )
res = l . parse ( " ab " )
self . assertEqual ( res . children , [ ' a ' , ' b ' ] )
grammar = """
start : A B | AB
A . - 99999999999999999999999 : " a "
B : " b "
AB : " ab "
"""
l = _Lark ( grammar )
res = l . parse ( " ab " )
self . assertEqual ( res . children , [ ' ab ' ] )
2017-03-09 16:15:55 +00:00
2017-08-22 19:28:47 +00:00
def test_import ( self ) :
grammar = """
start : NUMBER WORD
% import common . NUMBER
% import common . WORD
% import common . WS
% ignore WS
"""
l = _Lark ( grammar )
x = l . parse ( ' 12 elephants ' )
self . assertEqual ( x . children , [ ' 12 ' , ' elephants ' ] )
2018-07-07 21:00:02 +00:00
2019-02-20 08:45:25 +00:00
def test_import_rename ( self ) :
grammar = """
start : N W
% import common . NUMBER - > N
% import common . WORD - > W
% import common . WS
% ignore WS
"""
l = _Lark ( grammar )
x = l . parse ( ' 12 elephants ' )
self . assertEqual ( x . children , [ ' 12 ' , ' elephants ' ] )
2018-07-22 17:39:35 +00:00
def test_relative_import ( self ) :
2019-03-02 12:55:48 +00:00
l = _Lark_open ( ' test_relative_import.lark ' , rel_to = __file__ )
2018-07-07 21:00:02 +00:00
x = l . parse ( ' 12 lions ' )
self . assertEqual ( x . children , [ ' 12 ' , ' lions ' ] )
2019-02-20 08:45:25 +00:00
def test_relative_import_rename ( self ) :
l = _Lark_open ( ' test_relative_import_rename.lark ' , rel_to = __file__ )
x = l . parse ( ' 12 lions ' )
self . assertEqual ( x . children , [ ' 12 ' , ' lions ' ] )
2019-02-20 09:25:15 +00:00
def test_relative_rule_import ( self ) :
l = _Lark_open ( ' test_relative_rule_import.lark ' , rel_to = __file__ )
x = l . parse ( ' xaabby ' )
self . assertEqual ( x . children , [
' x ' ,
Tree ( ' expr ' , [ ' a ' , Tree ( ' expr ' , [ ' a ' , ' b ' ] ) , ' b ' ] ) ,
' y ' ] )
def test_relative_rule_import_drop_ignore ( self ) :
# %ignore rules are dropped on import
l = _Lark_open ( ' test_relative_rule_import_drop_ignore.lark ' ,
rel_to = __file__ )
self . assertRaises ( ( ParseError , UnexpectedInput ) ,
l . parse , ' xa abby ' )
def test_relative_rule_import_subrule ( self ) :
l = _Lark_open ( ' test_relative_rule_import_subrule.lark ' ,
rel_to = __file__ )
x = l . parse ( ' xaabby ' )
self . assertEqual ( x . children , [
' x ' ,
Tree ( ' startab ' , [
Tree ( ' grammars__ab__expr ' , [
' a ' , Tree ( ' grammars__ab__expr ' , [ ' a ' , ' b ' ] ) , ' b ' ,
] ) ,
] ) ,
' y ' ] )
2019-02-20 10:52:04 +00:00
def test_relative_rule_import_subrule_no_conflict ( self ) :
l = _Lark_open (
' test_relative_rule_import_subrule_no_conflict.lark ' ,
rel_to = __file__ )
x = l . parse ( ' xaby ' )
self . assertEqual ( x . children , [ Tree ( ' expr ' , [
' x ' ,
Tree ( ' startab ' , [
Tree ( ' grammars__ab__expr ' , [ ' a ' , ' b ' ] ) ,
] ) ,
' y ' ] ) ] )
self . assertRaises ( ( ParseError , UnexpectedInput ) ,
l . parse , ' xaxabyby ' )
def test_relative_rule_import_rename ( self ) :
l = _Lark_open ( ' test_relative_rule_import_rename.lark ' ,
rel_to = __file__ )
x = l . parse ( ' xaabby ' )
self . assertEqual ( x . children , [
' x ' ,
Tree ( ' ab ' , [ ' a ' , Tree ( ' ab ' , [ ' a ' , ' b ' ] ) , ' b ' ] ) ,
' y ' ] )
2018-07-22 17:39:35 +00:00
def test_multi_import ( self ) :
2018-07-07 21:00:02 +00:00
grammar = """
start : NUMBER WORD
2018-07-22 17:39:35 +00:00
% import common ( NUMBER , WORD , WS )
2018-07-07 21:00:02 +00:00
% ignore WS
"""
l = _Lark ( grammar )
x = l . parse ( ' 12 toucans ' )
self . assertEqual ( x . children , [ ' 12 ' , ' toucans ' ] )
2018-07-22 17:39:35 +00:00
def test_relative_multi_import ( self ) :
2019-03-02 12:55:48 +00:00
l = _Lark_open ( " test_relative_multi_import.lark " , rel_to = __file__ )
2018-07-07 21:00:02 +00:00
x = l . parse ( ' 12 capybaras ' )
self . assertEqual ( x . children , [ ' 12 ' , ' capybaras ' ] )
2019-04-10 00:33:55 +00:00
def test_relative_import_preserves_leading_underscore ( self ) :
l = _Lark_open ( " test_relative_import_preserves_leading_underscore.lark " , rel_to = __file__ )
x = l . parse ( ' Ax ' )
self . assertEqual ( next ( x . find_data ( ' c ' ) ) . children , [ ' A ' ] )
2019-04-10 08:36:25 +00:00
def test_relative_import_of_nested_grammar ( self ) :
l = _Lark_open ( " grammars/test_relative_import_of_nested_grammar.lark " , rel_to = __file__ )
x = l . parse ( ' N ' )
self . assertEqual ( next ( x . find_data ( ' rule_to_import ' ) ) . children , [ ' N ' ] )
2019-04-12 08:22:25 +00:00
def test_relative_import_rules_dependencies_imported_only_once ( self ) :
l = _Lark_open ( " test_relative_import_rules_dependencies_imported_only_once.lark " , rel_to = __file__ )
x = l . parse ( ' AAA ' )
self . assertEqual ( next ( x . find_data ( ' a ' ) ) . children , [ ' A ' ] )
self . assertEqual ( next ( x . find_data ( ' b ' ) ) . children , [ ' A ' ] )
self . assertEqual ( next ( x . find_data ( ' d ' ) ) . children , [ ' A ' ] )
2018-07-22 19:47:12 +00:00
def test_import_errors ( self ) :
grammar = """
start : NUMBER WORD
% import . grammars . bad_test . NUMBER
"""
self . assertRaises ( IOError , _Lark , grammar )
grammar = """
start : NUMBER WORD
% import bad_test . NUMBER
"""
self . assertRaises ( IOError , _Lark , grammar )
2018-07-07 21:00:02 +00:00
2017-08-23 07:52:46 +00:00
@unittest.skipIf ( PARSER != ' earley ' , " Currently only Earley supports priority in rules " )
def test_earley_prioritization ( self ) :
" Tests effect of priority on result "
grammar = """
start : a | b
a .1 : " a "
b .2 : " a "
"""
# l = Lark(grammar, parser='earley', lexer='standard')
l = _Lark ( grammar )
res = l . parse ( " a " )
self . assertEqual ( res . children [ 0 ] . data , ' b ' )
grammar = """
start : a | b
a .2 : " a "
b .1 : " a "
"""
l = _Lark ( grammar )
# l = Lark(grammar, parser='earley', lexer='standard')
res = l . parse ( " a " )
self . assertEqual ( res . children [ 0 ] . data , ' a ' )
2017-10-31 10:28:11 +00:00
2017-08-15 20:06:06 +00:00
@unittest.skipIf ( PARSER != ' earley ' , " Currently only Earley supports priority in rules " )
def test_earley_prioritization_sum ( self ) :
" Tests effect of priority on result "
grammar = """
start : ab_ b_ a_ | indirection
indirection : a_ bb_ a_
a_ : " a "
b_ : " b "
ab_ : " ab "
bb_ .1 : " bb "
"""
2018-12-22 19:18:16 +00:00
l = Lark ( grammar , priority = " invert " )
2017-08-15 20:06:06 +00:00
res = l . parse ( ' abba ' )
self . assertEqual ( ' ' . join ( child . data for child in res . children ) , ' ab_b_a_ ' )
grammar = """
start : ab_ b_ a_ | indirection
indirection : a_ bb_ a_
a_ : " a "
b_ : " b "
ab_ .1 : " ab "
bb_ : " bb "
"""
2018-12-22 19:18:16 +00:00
l = Lark ( grammar , priority = " invert " )
2017-08-15 20:06:06 +00:00
res = l . parse ( ' abba ' )
self . assertEqual ( ' ' . join ( child . data for child in res . children ) , ' indirection ' )
grammar = """
start : ab_ b_ a_ | indirection
indirection : a_ bb_ a_
a_ .2 : " a "
b_ .1 : " b "
ab_ .3 : " ab "
bb_ .3 : " bb "
"""
2018-12-22 19:18:16 +00:00
l = Lark ( grammar , priority = " invert " )
2017-08-15 20:06:06 +00:00
res = l . parse ( ' abba ' )
self . assertEqual ( ' ' . join ( child . data for child in res . children ) , ' ab_b_a_ ' )
grammar = """
start : ab_ b_ a_ | indirection
indirection : a_ bb_ a_
a_ .1 : " a "
b_ .1 : " b "
ab_ .4 : " ab "
bb_ .3 : " bb "
"""
2018-12-22 19:18:16 +00:00
l = Lark ( grammar , priority = " invert " )
2017-08-15 20:06:06 +00:00
res = l . parse ( ' abba ' )
self . assertEqual ( ' ' . join ( child . data for child in res . children ) , ' indirection ' )
2017-03-09 16:15:55 +00:00
2017-10-16 07:28:53 +00:00
def test_utf8 ( self ) :
g = u """ start: a
a : " ±a "
"""
l = _Lark ( g )
self . assertEqual ( l . parse ( u ' ±a ' ) , Tree ( ' start ' , [ Tree ( ' a ' , [ ] ) ] ) )
g = u """ start: A
A : " ±a "
"""
l = _Lark ( g )
self . assertEqual ( l . parse ( u ' ±a ' ) , Tree ( ' start ' , [ u ' \xb1 a ' ] ) )
2018-01-23 08:19:21 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " No empty rules " )
2017-10-31 11:36:54 +00:00
def test_ignore ( self ) :
grammar = r """
COMMENT : / ( ! | ( \/ \/ ) ) [ ^ \n ] * /
% ignore COMMENT
% import common . WS - > _WS
% import common . INT
start : " INT " i _WS + INT _WS *
"""
parser = _Lark ( grammar )
2018-01-07 15:20:07 +00:00
tree = parser . parse ( " int 1 ! This is a comment \n " )
2017-10-31 11:36:54 +00:00
self . assertEqual ( tree . children , [ ' 1 ' ] )
2017-10-31 14:14:02 +00:00
tree = parser . parse ( " int 1 ! This is a comment " ) # A trailing ignore token can be tricky!
self . assertEqual ( tree . children , [ ' 1 ' ] )
2017-11-19 13:28:41 +00:00
parser = _Lark ( r """
start : " a " *
% ignore " b "
""" )
tree = parser . parse ( " bb " )
self . assertEqual ( tree . children , [ ] )
2017-10-31 11:36:54 +00:00
2017-11-16 10:40:15 +00:00
def test_regex_escaping ( self ) :
g = _Lark ( " start: /[ab]/ " )
g . parse ( ' a ' )
g . parse ( ' b ' )
2017-11-17 08:43:41 +00:00
self . assertRaises ( UnexpectedInput , g . parse , ' c ' )
2017-11-16 10:40:15 +00:00
_Lark ( r ' start: / \ w/ ' ) . parse ( ' a ' )
g = _Lark ( r ' start: / \\ w/ ' )
2017-11-17 08:43:41 +00:00
self . assertRaises ( UnexpectedInput , g . parse , ' a ' )
2017-11-16 10:40:15 +00:00
g . parse ( r ' \ w ' )
_Lark ( r ' start: / \ [/ ' ) . parse ( ' [ ' )
_Lark ( r ' start: / \ // ' ) . parse ( ' / ' )
_Lark ( r ' start: / \\ / ' ) . parse ( ' \\ ' )
_Lark ( r ' start: / \ [ab]/ ' ) . parse ( ' [ab] ' )
_Lark ( r ' start: / \\ [ab]/ ' ) . parse ( ' \\ a ' )
_Lark ( r ' start: / \ t/ ' ) . parse ( ' \t ' )
_Lark ( r ' start: / \\ t/ ' ) . parse ( ' \\ t ' )
_Lark ( r ' start: / \\ \ t/ ' ) . parse ( ' \\ \t ' )
_Lark ( r ' start: " \ t " ' ) . parse ( ' \t ' )
_Lark ( r ' start: " \\ t " ' ) . parse ( ' \\ t ' )
_Lark ( r ' start: " \\ \ t " ' ) . parse ( ' \\ \t ' )
2018-02-10 17:43:19 +00:00
def test_ranged_repeat_rules ( self ) :
g = u """ !start: " A " ~3
"""
l = _Lark ( g )
self . assertEqual ( l . parse ( u ' AAA ' ) , Tree ( ' start ' , [ " A " , " A " , " A " ] ) )
self . assertRaises ( ParseError , l . parse , u ' AA ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AAAA ' )
g = u """ !start: " A " ~0..2
"""
if PARSER != ' cyk ' : # XXX CYK currently doesn't support empty grammars
l = _Lark ( g )
self . assertEqual ( l . parse ( u ' ' ) , Tree ( ' start ' , [ ] ) )
self . assertEqual ( l . parse ( u ' A ' ) , Tree ( ' start ' , [ ' A ' ] ) )
self . assertEqual ( l . parse ( u ' AA ' ) , Tree ( ' start ' , [ ' A ' , ' A ' ] ) )
self . assertRaises ( ( UnexpectedToken , UnexpectedInput ) , l . parse , u ' AAA ' )
g = u """ !start: " A " ~3..2
"""
self . assertRaises ( GrammarError , _Lark , g )
g = u """ !start: " A " ~2..3 " B " ~2
"""
l = _Lark ( g )
self . assertEqual ( l . parse ( u ' AABB ' ) , Tree ( ' start ' , [ ' A ' , ' A ' , ' B ' , ' B ' ] ) )
self . assertEqual ( l . parse ( u ' AAABB ' ) , Tree ( ' start ' , [ ' A ' , ' A ' , ' A ' , ' B ' , ' B ' ] ) )
self . assertRaises ( ParseError , l . parse , u ' AAAB ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AAABBB ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' ABB ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AAAABB ' )
def test_ranged_repeat_terms ( self ) :
g = u """ !start: AAA
AAA : " A " ~ 3
"""
l = _Lark ( g )
self . assertEqual ( l . parse ( u ' AAA ' ) , Tree ( ' start ' , [ " AAA " ] ) )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AA ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AAAA ' )
g = u """ !start: AABB CC
AABB : " A " ~ 0. .2 " B " ~ 2
CC : " C " ~ 1. .2
"""
l = _Lark ( g )
self . assertEqual ( l . parse ( u ' AABBCC ' ) , Tree ( ' start ' , [ ' AABB ' , ' CC ' ] ) )
self . assertEqual ( l . parse ( u ' BBC ' ) , Tree ( ' start ' , [ ' BB ' , ' C ' ] ) )
self . assertEqual ( l . parse ( u ' ABBCC ' ) , Tree ( ' start ' , [ ' ABB ' , ' CC ' ] ) )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AAAB ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AAABBB ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' ABB ' )
self . assertRaises ( ( ParseError , UnexpectedInput ) , l . parse , u ' AAAABB ' )
2017-11-16 10:40:15 +00:00
2018-04-24 12:36:53 +00:00
@unittest.skipIf ( PARSER == ' earley ' , " Priority not handled correctly right now " ) # TODO XXX
def test_priority_vs_embedded ( self ) :
g = """
A .2 : " a "
WORD : ( " a " . . " z " ) +
start : ( A | WORD ) +
"""
l = _Lark ( g )
t = l . parse ( ' abc ' )
self . assertEqual ( t . children , [ ' a ' , ' bc ' ] )
self . assertEqual ( t . children [ 0 ] . type , ' A ' )
2017-11-16 10:40:15 +00:00
2018-08-14 14:35:47 +00:00
def test_line_counting ( self ) :
p = _Lark ( " start: /[^x]+/ " )
text = ' hello \n world '
t = p . parse ( text )
tok = t . children [ 0 ]
self . assertEqual ( tok , text )
self . assertEqual ( tok . line , 1 )
self . assertEqual ( tok . column , 1 )
if _LEXER != ' dynamic ' :
self . assertEqual ( tok . end_line , 2 )
self . assertEqual ( tok . end_column , 6 )
2018-10-15 18:52:27 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " Empty rules " )
def test_empty_end ( self ) :
p = _Lark ( """
start : b c d
b : " B "
c : | " C "
d : | " D "
""" )
res = p . parse ( ' B ' )
self . assertEqual ( len ( res . children ) , 3 )
2018-12-19 23:24:23 +00:00
@unittest.skipIf ( PARSER == ' cyk ' , " Empty rules " )
2018-12-19 13:37:02 +00:00
def test_maybe_placeholders ( self ) :
2018-12-19 16:52:24 +00:00
# Anonymous tokens shouldn't count
2019-03-08 15:14:31 +00:00
p = _Lark ( """ start: [ " a " ] [ " b " ] [ " c " ] """ , maybe_placeholders = True )
2018-12-19 16:52:24 +00:00
self . assertEqual ( p . parse ( " " ) . children , [ ] )
2019-03-08 15:14:31 +00:00
# All invisible constructs shouldn't count
p = _Lark ( """ start: [A] [ " b " ] [_c] [ " e " " f " _c]
2018-12-19 16:52:24 +00:00
A : " a "
_c : " c " """ , maybe_placeholders=True)
2018-12-19 23:24:23 +00:00
self . assertEqual ( p . parse ( " " ) . children , [ None ] )
2019-03-08 15:14:31 +00:00
self . assertEqual ( p . parse ( " c " ) . children , [ None ] )
self . assertEqual ( p . parse ( " aefc " ) . children , [ ' a ' ] )
2018-12-19 16:52:24 +00:00
2019-03-08 15:14:31 +00:00
# ? shouldn't apply
p = _Lark ( """ !start: [ " a " ] " b " ? [ " c " ] """ , maybe_placeholders = True )
self . assertEqual ( p . parse ( " " ) . children , [ None , None ] )
self . assertEqual ( p . parse ( " b " ) . children , [ None , ' b ' , None ] )
p = _Lark ( """ !start: [ " a " ] [ " b " ] [ " c " ] """ , maybe_placeholders = True )
2018-12-19 13:37:02 +00:00
self . assertEqual ( p . parse ( " " ) . children , [ None , None , None ] )
self . assertEqual ( p . parse ( " a " ) . children , [ ' a ' , None , None ] )
self . assertEqual ( p . parse ( " b " ) . children , [ None , ' b ' , None ] )
self . assertEqual ( p . parse ( " c " ) . children , [ None , None , ' c ' ] )
self . assertEqual ( p . parse ( " ab " ) . children , [ ' a ' , ' b ' , None ] )
self . assertEqual ( p . parse ( " ac " ) . children , [ ' a ' , None , ' c ' ] )
self . assertEqual ( p . parse ( " bc " ) . children , [ None , ' b ' , ' c ' ] )
self . assertEqual ( p . parse ( " abc " ) . children , [ ' a ' , ' b ' , ' c ' ] )
2019-03-08 15:14:31 +00:00
p = _Lark ( """ !start: ([ " a " ] " b " [ " c " ])+ """ , maybe_placeholders = True )
2018-12-19 13:37:02 +00:00
self . assertEqual ( p . parse ( " b " ) . children , [ None , ' b ' , None ] )
self . assertEqual ( p . parse ( " bb " ) . children , [ None , ' b ' , None , None , ' b ' , None ] )
self . assertEqual ( p . parse ( " abbc " ) . children , [ ' a ' , ' b ' , None , None , ' b ' , ' c ' ] )
self . assertEqual ( p . parse ( " babbcabcb " ) . children ,
2018-12-20 23:41:35 +00:00
[ None , ' b ' , None ,
' a ' , ' b ' , None ,
2018-12-19 13:37:02 +00:00
None , ' b ' , ' c ' ,
' a ' , ' b ' , ' c ' ,
None , ' b ' , None ] )
2019-03-08 15:14:31 +00:00
p = _Lark ( """ !start: [ " a " ] [ " c " ] " b " + [ " a " ] [ " d " ] """ , maybe_placeholders = True )
2018-12-19 23:24:23 +00:00
self . assertEqual ( p . parse ( " bb " ) . children , [ None , None , ' b ' , ' b ' , None , None ] )
self . assertEqual ( p . parse ( " bd " ) . children , [ None , None , ' b ' , None , ' d ' ] )
self . assertEqual ( p . parse ( " abba " ) . children , [ ' a ' , None , ' b ' , ' b ' , ' a ' , None ] )
self . assertEqual ( p . parse ( " cbbbb " ) . children , [ None , ' c ' , ' b ' , ' b ' , ' b ' , ' b ' , None , None ] )
2017-11-16 10:40:15 +00:00
2019-01-31 22:03:45 +00:00
def test_escaped_string ( self ) :
" Tests common.ESCAPED_STRING "
grammar = r """
start : ESCAPED_STRING +
% import common ( WS_INLINE , ESCAPED_STRING )
% ignore WS_INLINE
"""
parser = _Lark ( grammar )
parser . parse ( r ' " \\ " " b " " c " ' )
parser . parse ( r ' " That " " And a \ " b " ' )
2019-05-19 10:30:25 +00:00
def test_meddling_unused ( self ) :
" Unless ' unused ' is removed, LALR analysis will fail on reduce-reduce collision "
grammar = """
start : EKS * x
x : EKS
unused : x *
EKS : " x "
"""
parser = _Lark ( grammar )
2019-08-21 10:14:28 +00:00
@unittest.skipIf ( PARSER != ' lalr ' or LEXER == ' custom ' , " Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend) " )
2019-04-09 10:48:43 +00:00
def test_serialize ( self ) :
grammar = """
2019-04-11 12:26:36 +00:00
start : _ANY b " C "
_ANY : / . /
2019-04-09 10:48:43 +00:00
b : " B "
"""
parser = _Lark ( grammar )
d = parser . serialize ( )
parser2 = Lark . deserialize ( d , { } , { } )
self . assertEqual ( parser2 . parse ( ' ABC ' ) , Tree ( ' start ' , [ Tree ( ' b ' , [ ] ) ] ) )
namespace = { ' Rule ' : Rule , ' TerminalDef ' : TerminalDef }
d , m = parser . memo_serialize ( namespace . values ( ) )
parser3 = Lark . deserialize ( d , namespace , m )
self . assertEqual ( parser3 . parse ( ' ABC ' ) , Tree ( ' start ' , [ Tree ( ' b ' , [ ] ) ] ) )
2019-07-01 14:07:23 +00:00
def test_multi_start ( self ) :
parser = _Lark ( '''
2019-07-01 15:23:44 +00:00
a : " x " " a " ?
2019-07-01 14:07:23 +00:00
b : " x " " b " ?
''' , start=[ ' a ' , ' b ' ])
2019-07-01 15:23:44 +00:00
self . assertEqual ( parser . parse ( ' xa ' , ' a ' ) , Tree ( ' a ' , [ ] ) )
self . assertEqual ( parser . parse ( ' xb ' , ' b ' ) , Tree ( ' b ' , [ ] ) )
2019-07-01 14:07:23 +00:00
2019-08-09 21:53:02 +00:00
def test_lexer_detect_newline_tokens ( self ) :
# Detect newlines in regular tokens
2019-08-10 12:09:37 +00:00
g = _Lark ( r """ start: " go " tail*
! tail : SA " @ " | SB " @ " | SC " @ " | SD " @ "
SA : " a " / \n /
SB : / b . / s
SC : " c " / [ ^ a - z ] /
SD : " d " / \s /
""" )
a , b , c , d = [ x . children [ 1 ] for x in g . parse ( ' goa \n @b \n @c \n @d \n @ ' ) . children ]
2019-08-09 21:53:02 +00:00
self . assertEqual ( a . line , 2 )
self . assertEqual ( b . line , 3 )
self . assertEqual ( c . line , 4 )
self . assertEqual ( d . line , 5 )
# Detect newlines in ignored tokens
for re in [ ' / \\ n/ ' , ' /[^a-z]/ ' , ' / \\ s/ ' ] :
2019-08-10 12:09:37 +00:00
g = _Lark ( ''' !start: " a " " a "
% ignore { } ''' .format(re))
a , b = g . parse ( ' a \n a ' ) . children
2019-08-09 21:53:02 +00:00
self . assertEqual ( a . line , 1 )
self . assertEqual ( b . line , 2 )
2019-04-09 10:48:43 +00:00
2017-10-16 07:28:53 +00:00
2018-05-05 13:52:39 +00:00
_NAME = " Test " + PARSER . capitalize ( ) + LEXER . capitalize ( )
2017-02-07 23:19:33 +00:00
_TestParser . __name__ = _NAME
globals ( ) [ _NAME ] = _TestParser
2017-05-01 14:08:10 +00:00
# Note: You still have to import them in __main__ for the tests to run
2017-02-25 16:35:31 +00:00
_TO_TEST = [
( ' standard ' , ' earley ' ) ,
2018-01-23 08:19:21 +00:00
( ' standard ' , ' cyk ' ) ,
2017-05-01 14:08:10 +00:00
( ' dynamic ' , ' earley ' ) ,
2018-06-27 13:31:02 +00:00
( ' dynamic_complete ' , ' earley ' ) ,
2017-02-25 16:35:31 +00:00
( ' standard ' , ' lalr ' ) ,
( ' contextual ' , ' lalr ' ) ,
2019-08-21 10:14:28 +00:00
( ' custom ' , ' lalr ' ) ,
2018-05-05 13:52:39 +00:00
# (None, 'earley'),
2017-02-25 16:35:31 +00:00
]
2017-05-01 14:08:10 +00:00
for _LEXER , _PARSER in _TO_TEST :
_make_parser_test ( _LEXER , _PARSER )
2017-02-07 15:40:46 +00:00
2018-11-13 22:44:50 +00:00
for _LEXER in ( ' dynamic ' , ' dynamic_complete ' ) :
2017-08-04 12:32:39 +00:00
_make_full_earley_test ( _LEXER )
2017-02-07 15:40:46 +00:00
if __name__ == ' __main__ ' :
unittest . main ( )