#!/usr/bin/env python # This file is part of exrex. # # exrex is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # exrex is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with exrex. If not, see < http://www.gnu.org/licenses/ >. # # (C) 2012- by Adam Tauber, from re import sre_parse from itertools import product, repeat CATEGORIES = {'category_space' : sre_parse.WHITESPACE ,'category_digit' : sre_parse.DIGITS ,'category_any' : [chr(x) for x in range(32, 123)] } def _p(d, append=False): """docstring for _p""" #print d ret =[] ranges = [''] if not isinstance(d, list): print '[!] not a list: %r' % d return [] if not len(d): print '[!] empty list' return [] l = '' for i in d: if len(ranges) and i[0] != 'range': if len(ret): ret = [r+char for char in ranges for r in ret] else: ret = ranges ranges = [] if i[0] == 'literal': if append: if ret[0] == '': ret[0] = chr(i[1]) else: ret.append(chr(i[1])) else: for k,_ in enumerate(ret): ret[k] += chr(i[1]) elif i[0] == 'subpattern': for sub in i[1:]: tmp_ret = [] for piece in _p(list(sub[1])): for k,_ in enumerate(ret): tmp_ret.append(ret[k]+piece) if len(tmp_ret): ret = tmp_ret elif i[0] == 'in': ret = [r+piece for piece in _p(list(i[1]), True) for r in ret] elif i[0] == 'range': ranges.extend(map(chr, range(i[1][0], i[1][1]+1))) elif i[0] == 'max_repeat': chars = [x for x in _p(list(i[1][2])) if x != ''] ret = [r+''.join(piece) for rep in range(i[1][0], i[1][1]+1) for piece in product(*repeat(chars, rep)) for r in ret] # tmp_ret = [] # for piece in _p(list(i[1][2])): # for rep in range(i[1][0], i[1][1]+1): # for r in ret: # tmp_ret.append(r+piece*rep) # ret = tmp_ret elif i[0] == 'category': cat = CATEGORIES.get(i[1], ['']) ret = [r+c for r in ret for c in cat] elif i[0] == 'branch': subs = [] for piece in [_p(list(x)) for x in i[1][1]]: subs.extend(piece) ret = [r+s for r in ret for s in subs] elif i[0] == 'any': ret = [r+c for c in CATEGORIES['category_any'] for r in ret] if len(ranges): if len(ret) and ret[0] != '': tmp_ret = [] for char in ranges: for k,_ in enumerate(ret): tmp_ret.append(ret[k]+char) ret = tmp_ret else: ret = ranges #print ret return ret def parse(s): """docstring for parse""" r = sre_parse.parse(s) # print r return _p(list(r)) def argparser(): import argparse from sys import stdout argp = argparse.ArgumentParser(description='exrex - regular expression string generator') argp.add_argument('-o', '--output' ,help = 'Output file - default is STDOUT' ,metavar = 'FILE' ,default = stdout ,type = argparse.FileType('w') ) argp.add_argument('-d', '--delimiter' ,help = 'Delimiter - default is \\n' ,default = '\n' ) argp.add_argument('-v', '--verbose' ,action = 'count' ,help = 'Verbosity level - default is 3' ,default = 3 ) argp.add_argument('regex' ,metavar = 'REGEX' ,help = 'REGEX string' ) return vars(argp.parse_args()) def __main__(): # 'as(d|f)qw(e|r|s)[a-zA-Z]{2,3}' # 'as(QWE|Z([XC]|Y|U)V){2,3}asdf' # '.?' args = argparser() for s in parse(args['regex']): args['output'].write(s+args['delimiter']) if __name__ == '__main__': __main__()