From 06bccc489acf5523ae8c4e7a2867cc7e54cfaf70 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 23 Sep 2012 22:50:44 +0200 Subject: [PATCH] [enh] random/getone feature added ++ moar tests --- exrex.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-- tests.py | 20 +++++++++++++++++--- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/exrex.py b/exrex.py index 4dd9e5c..893b3d8 100644 --- a/exrex.py +++ b/exrex.py @@ -23,8 +23,9 @@ except: pass from re import sre_parse from itertools import product, chain, tee +from random import choice,randint -__all__ = ('generate', 'CATEGORIES', 'count', 'parse') +__all__ = ('generate', 'CATEGORIES', 'count', 'parse', 'getone') CATEGORIES = {'category_space' : sorted(sre_parse.WHITESPACE) ,'category_digit' : sorted(sre_parse.DIGITS) @@ -61,7 +62,7 @@ def prods(orig, ran, items): yield o+''.join(s) def _gen(d, limit=20, count=False): - """docstring for _p""" + """docstring for _gen""" ret = [''] strings = 0 for i in d: @@ -103,6 +104,9 @@ def _gen(d, limit=20, count=False): if count: strings = (strings or 1) * len(subs) ret = comb(ret, subs) + # ignore ^ and $ + elif i[0] == 'at': + continue else: print('[!] cannot handle expression "%r"' % i) @@ -111,6 +115,38 @@ def _gen(d, limit=20, count=False): return ret +def _randone(d, limit=20): + """docstring for _randone""" + ret = '' + for i in d: + if i[0] == 'in': + ret += choice(_in(i[1])) + elif i[0] == 'literal': + ret += chr(i[1]) + elif i[0] == 'category': + ret += choice(CATEGORIES.get(i[1], [''])) + elif i[0] == 'any': + ret += choice(CATEGORIES['category_any']) + elif i[0] == 'max_repeat': + chars = filter(None, _gen(list(i[1][2]), limit)) + if i[1][1]+1 - i[1][0] > limit: + min,max = i[1][0], i[1][0]+limit + else: + min,max = i[1][0], i[1][1] + for _ in range(randint(min, max)): + ret += choice(chars) + elif i[0] == 'branch': + ret += choice(chain.from_iterable(_gen(list(x), limit) for x in i[1][1])) + elif i[0] == 'subpattern': + l = i[1:] + ret += choice(list(chain.from_iterable(_gen(list(x[1]), limit) for x in l))) + elif i[0] == 'at': + continue + else: + print('[!] cannot handle expression "%s"' % str(i)) + + return ret + def parse(s): """Regular expression parser @@ -145,6 +181,11 @@ def count(s, limit=20): """ return _gen(parse(s), limit, count=True) +def getone(regex_string, limit=20): + """Returns a random matching string to a given regular expression + """ + return _randone(parse(regex_string), limit) + def argparser(): import argparse from sys import stdout @@ -167,6 +208,11 @@ def argparser(): ,default = False ,action = 'store_true' ) + argp.add_argument('-r', '--random' + ,help = 'Returns a random string that matches to the regex' + ,default = False + ,action = 'store_true' + ) argp.add_argument('-d', '--delimiter' ,help = 'Delimiter - default is \\n' ,default = '\n' @@ -197,6 +243,9 @@ def __main__(): if args['count']: args['output'].write('%d%s' % (count(args['regex']), args['delimiter'])) exit(0) + if args['random']: + args['output'].write('%s%s' % (getone(args['regex']), args['delimiter'])) + exit(0) try: g = generate(args['regex'], args['limit']) except Exception, e: diff --git a/tests.py b/tests.py index 38d03ea..a7b102d 100644 --- a/tests.py +++ b/tests.py @@ -1,6 +1,7 @@ -from exrex import generate, count +from exrex import generate, count, getone +from re import match RS = {'[ab][cd]': ['ac', 'ad', 'bc', 'bd'] @@ -13,6 +14,11 @@ RS = {'[ab][cd]': ['ac', 'ad', 'bc', 'bd'] ,'(a(b(c(d(e(f){1,2}))))){1,2}': ['abcdef', 'abcdeff', 'abcdefabcdef', 'abcdefabcdeff', 'abcdeffabcdef', 'abcdeffabcdeff'] } +BIGS = ['^a*$' + ,'^[a-zA-Z]+$' + ,'^(foo){3,}$' + ] + def gen_test(): for regex, result in RS.items(): assert list(generate(regex)) == result @@ -21,9 +27,17 @@ def count_test(): for regex, result in RS.items(): assert count(regex) == len(result) +def getone_test(): + for regex,_ in RS.items(): + assert match(regex, getone(regex)) + for regex in BIGS: + assert match(regex, getone(regex)) + if __name__ == '__main__': gen_test() - print('[!] generation test passed') + print('[+] generation test passed') count_test() - print('[!] length test passed') + print('[+] length test passed') + getone_test() + print('[+] random generation test passed')