mirror of https://github.com/asciimoo/exrex.git
[enh] Initial commit
This commit is contained in:
commit
f3ee9d7e2e
|
@ -0,0 +1,2 @@
|
|||
.*swp
|
||||
*pyc
|
|
@ -0,0 +1,82 @@
|
|||
EXREX
|
||||
=====
|
||||
|
||||
### Description
|
||||
|
||||
Exrex is a tool that generates all matching string to a given regular expression.
|
||||
|
||||
### Command line usage
|
||||
|
||||
```
|
||||
> python exrex.py --help
|
||||
usage: exrex.py [-h] [-o FILE] [-d DELIMITER] [-v] REGEX
|
||||
|
||||
exrex - regular expression string generator
|
||||
|
||||
positional arguments:
|
||||
REGEX REGEX string
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-o FILE, --output FILE
|
||||
Output file - default is STDOUT
|
||||
-d DELIMITER, --delimiter DELIMITER
|
||||
Delimiter - default is \n
|
||||
-v, --verbose Verbosity level - default is 3
|
||||
```
|
||||
|
||||
### Using as python module
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
import exrex
|
||||
|
||||
print exrex.parse('[ab]{1, 3}')
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
a
|
||||
b
|
||||
aa
|
||||
ab
|
||||
ba
|
||||
bb
|
||||
aaa
|
||||
aab
|
||||
aba
|
||||
abb
|
||||
baa
|
||||
bab
|
||||
bba
|
||||
bbb
|
||||
```
|
||||
|
||||
### TODO
|
||||
|
||||
* Memory usage reduction (!generators!)
|
||||
* Count the number of matching strings
|
||||
* Command line switches to change default character sets/ranges (eg. for '.','\s'..)
|
||||
|
||||
|
||||
### License
|
||||
|
||||
```
|
||||
exrex is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
exrex is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with exrex. If not, see < http://www.gnu.org/licenses/ >.
|
||||
|
||||
(C) 2012- by Adam Tauber, <asciimoo@gmail.com>
|
||||
```
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# This file is part of exrex.
|
||||
#
|
||||
# exrex is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# exrex is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with exrex. If not, see < http://www.gnu.org/licenses/ >.
|
||||
#
|
||||
# (C) 2012- by Adam Tauber, <asciimoo@gmail.com>
|
||||
|
||||
from re import sre_parse
|
||||
from itertools import product, repeat
|
||||
|
||||
CATEGORIES = {'category_space' : sre_parse.WHITESPACE
|
||||
,'category_digit' : sre_parse.DIGITS
|
||||
,'category_any' : [chr(x) for x in range(32, 123)]
|
||||
}
|
||||
|
||||
def _p(d, append=False):
|
||||
"""docstring for _p"""
|
||||
#print d
|
||||
ret =[]
|
||||
ranges = ['']
|
||||
if not isinstance(d, list):
|
||||
print '[!] not a list: %r' % d
|
||||
return []
|
||||
if not len(d):
|
||||
print '[!] empty list'
|
||||
return []
|
||||
l = ''
|
||||
for i in d:
|
||||
if len(ranges) and i[0] != 'range':
|
||||
if len(ret):
|
||||
tmp_ret = []
|
||||
for char in ranges:
|
||||
for k,_ in enumerate(ret):
|
||||
tmp_ret.append(ret[k]+char)
|
||||
ret = tmp_ret
|
||||
else:
|
||||
ret = ranges
|
||||
ranges = []
|
||||
|
||||
if i[0] == 'literal':
|
||||
if append:
|
||||
if ret[0] == '':
|
||||
ret[0] = chr(i[1])
|
||||
else:
|
||||
ret.append(chr(i[1]))
|
||||
else:
|
||||
for k,_ in enumerate(ret):
|
||||
ret[k] += chr(i[1])
|
||||
elif i[0] == 'subpattern':
|
||||
for sub in i[1:]:
|
||||
tmp_ret = []
|
||||
for piece in _p(list(sub[1])):
|
||||
for k,_ in enumerate(ret):
|
||||
tmp_ret.append(ret[k]+piece)
|
||||
if len(tmp_ret):
|
||||
ret = tmp_ret
|
||||
elif i[0] == 'in':
|
||||
tmp_ret = []
|
||||
for piece in _p(list(i[1]), True):
|
||||
for k,_ in enumerate(ret):
|
||||
tmp_ret.append(ret[k]+piece)
|
||||
ret = tmp_ret
|
||||
elif i[0] == 'range':
|
||||
ranges.extend(map(chr, range(i[1][0], i[1][1]+1)))
|
||||
elif i[0] == 'max_repeat':
|
||||
tmp_ret = []
|
||||
chars = [x for x in _p(list(i[1][2])) if x != '']
|
||||
ret = [r+''.join(piece) for rep in range(i[1][0], i[1][1]+1) for piece in product(*repeat(chars, rep)) for r in ret]
|
||||
# tmp_ret = []
|
||||
# for piece in _p(list(i[1][2])):
|
||||
# for rep in range(i[1][0], i[1][1]+1):
|
||||
# for r in ret:
|
||||
# tmp_ret.append(r+piece*rep)
|
||||
# ret = tmp_ret
|
||||
elif i[0] == 'category':
|
||||
cat = CATEGORIES.get(i[1], [''])
|
||||
ret = [r+c for r in ret for c in cat]
|
||||
elif i[0] == 'branch':
|
||||
subs = []
|
||||
for piece in [_p(list(x)) for x in i[1][1]]:
|
||||
subs.extend(piece)
|
||||
ret = [r+s for r in ret for s in subs]
|
||||
elif i[0] == 'any':
|
||||
ret = [r+c for c in CATEGORIES['category_any'] for r in ret]
|
||||
|
||||
if len(ranges):
|
||||
if len(ret) and ret[0] != '':
|
||||
tmp_ret = []
|
||||
for char in ranges:
|
||||
for k,_ in enumerate(ret):
|
||||
tmp_ret.append(ret[k]+char)
|
||||
ret = tmp_ret
|
||||
else:
|
||||
ret = ranges
|
||||
#print ret
|
||||
return ret
|
||||
|
||||
|
||||
def parse(s):
|
||||
"""docstring for parse"""
|
||||
r = sre_parse.parse(s)
|
||||
# print r
|
||||
return _p(list(r))
|
||||
|
||||
|
||||
def argparser():
|
||||
import argparse
|
||||
from sys import stdout
|
||||
argp = argparse.ArgumentParser(description='exrex - regular expression string generator')
|
||||
argp.add_argument('-o', '--output'
|
||||
,help = 'Output file - default is STDOUT'
|
||||
,metavar = 'FILE'
|
||||
,default = stdout
|
||||
,type = argparse.FileType('w')
|
||||
)
|
||||
argp.add_argument('-d', '--delimiter'
|
||||
,help = 'Delimiter - default is \\n'
|
||||
,default = '\n'
|
||||
)
|
||||
argp.add_argument('-v', '--verbose'
|
||||
,action = 'count'
|
||||
,help = 'Verbosity level - default is 3'
|
||||
,default = 3
|
||||
)
|
||||
argp.add_argument('regex'
|
||||
,metavar = 'REGEX'
|
||||
,help = 'REGEX string'
|
||||
)
|
||||
return vars(argp.parse_args())
|
||||
|
||||
def __main__():
|
||||
# 'as(d|f)qw(e|r|s)[a-zA-Z]{2,3}'
|
||||
# 'as(QWE|Z([XC]|Y|U)V){2,3}asdf'
|
||||
# '.?'
|
||||
args = argparser()
|
||||
for s in parse(args['regex']):
|
||||
args['output'].write(s+args['delimiter'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
__main__()
|
Loading…
Reference in New Issue