Improve arguments & add shortcut function

- Escape strings using re.escape unless regex flag
  is set
- Added shortcut function multi_replace
- New unittests
This commit is contained in:
Brant Watson 2018-06-27 16:51:35 -05:00 committed by Mahmoud Hashemi
parent 387e59b54c
commit f7e1cb8111
2 changed files with 57 additions and 20 deletions

View File

@ -974,19 +974,26 @@ def format_int_list(int_list, delim=',', range_delim='-', delim_space=False):
return output_str return output_str
class MultiSub(object): class MultiReplace(object):
""" """
Multisub is a tool for doing multiple find/replace actions in one pass. MultiReplace is a tool for doing multiple find/replace actions in one pass.
Given a mapping of values to be replaced it allows for all of the matching Given a mapping of values to be replaced it allows for all of the matching
values to be replaced in a single pass which can save a lot of performance values to be replaced in a single pass which can save a lot of performance
on very large strings. In addition to simple replace, it also allows for on very large strings. In addition to simple replace, it also allows for
replacing based on regular expressions. replacing based on regular expressions.
Keyword Arguments:
:type regex: bool
:param regex: Treat search keys as regular expressions [Default: False]
:type flags: int
:param flags: flags to pass to the regex engine during compile
Dictionary Usage:: Dictionary Usage::
from lrmslib import stringutils from lrmslib import stringutils
s = stringutils.MultiSub({ s = stringutils.MultiReplace({
'foo': 'zoo', 'foo': 'zoo',
'cat': 'hat', 'cat': 'hat',
'bat': 'kraken' 'bat': 'kraken'
@ -997,7 +1004,7 @@ class MultiSub(object):
Iterable Usage:: Iterable Usage::
from lrmslib import stringutils from lrmslib import stringutils
s = stringutils.MultiSub([ s = stringutils.MultiReplace([
('foo', 'zoo'), ('foo', 'zoo'),
('cat', 'hat'), ('cat', 'hat'),
('bat', 'kraken)' ('bat', 'kraken)'
@ -1014,7 +1021,7 @@ class MultiSub(object):
>>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar') >>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar')
'bar bar bar' 'bar bar bar'
>>> m = MultiSub({'foo': 'baz', 'baz': 'bar'}) >>> m = MultiReplace({'foo': 'baz', 'baz': 'bar'})
>>> m.sub('foo bar baz') >>> m.sub('foo bar baz')
'baz bar bar' 'baz bar bar'
@ -1024,8 +1031,13 @@ class MultiSub(object):
of a dictionary. of a dictionary.
""" """
def __init__(self, sub_map): def __init__(self, sub_map, **kwargs):
"""Compile any regular expressions that have been passed.""" """Compile any regular expressions that have been passed."""
options = {
'regex': False,
'flags': 0,
}
options.update(kwargs)
self.sub_data = [] self.sub_data = []
if isinstance(sub_map, collections.Mapping): if isinstance(sub_map, collections.Mapping):
@ -1033,26 +1045,36 @@ class MultiSub(object):
for exp, replacement in sub_map: for exp, replacement in sub_map:
if isinstance(exp, basestring): if isinstance(exp, basestring):
exp = re.compile(exp) # If we're not treating input strings like a regex, escape it
if not options['regex']:
exp = re.escape(exp)
exp = re.compile(exp, flags=options['flags'])
self.sub_data.append((exp, replacement)) self.sub_data.append((exp, replacement))
self.combined_pattern = re.compile('|'.join([ self.combined_pattern = re.compile('|'.join([
'(?:{0})'.format(x.pattern) for x, _ '(?:{0})'.format(x.pattern) for x, _
in self.sub_data in self.sub_data
])) ]), flags=options['flags'])
def __call__(self, match): def _get_value(self, match):
"""Given a match object find replacement value."""
value = match.string[match.start():match.end()] value = match.string[match.start():match.end()]
for exp, replacement in self.sub_data: for exp, replacement in self.sub_data:
if exp.match(value): if exp.match(value):
return replacement return replacement
return value return value
def sub(self, input_string): def sub(self, text):
""" """
Run substitutions. Run substitutions on the input text.
Given an input string, run all substitutions given in the Given an input string, run all substitutions given in the
constructor. constructor.
""" """
return self.combined_pattern.sub(self, input_string) return self.combined_pattern.sub(self._get_value, text)
def multi_replace(text, sub_map, **kwargs):
"""Shortcut function to invoke multi-replace in a single command."""
m = MultiReplace(sub_map, **kwargs)
return m.sub(text)

View File

@ -47,25 +47,35 @@ def test_format_int_list():
assert strutils.format_int_list([5, 6, 7, 8], delim_space=True) == '5-8' assert strutils.format_int_list([5, 6, 7, 8], delim_space=True) == '5-8'
class TestMultiSub(TestCase): class TestMultiReplace(TestCase):
def test_simple_substitutions(self): def test_simple_substitutions(self):
"""Test replacing multiple values.""" """Test replacing multiple values."""
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', }) m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
self.assertEqual(m.sub('The cat is purple'), 'The kedi is mor') self.assertEqual(m.sub('The cat is purple'), 'The kedi is mor')
def test_shortcut_function(self):
"""Test replacing multiple values."""
self.assertEqual(
strutils.multi_replace(
'The cat is purple',
{r'cat': 'kedi', r'purple': 'mor', }
),
'The kedi is mor'
)
def test_substitutions_in_word(self): def test_substitutions_in_word(self):
"""Test replacing multiple values that are substrings of a word.""" """Test replacing multiple values that are substrings of a word."""
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', }) m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
self.assertEqual(m.sub('Thecatispurple'), 'Thekediismor') self.assertEqual(m.sub('Thecatispurple'), 'Thekediismor')
def test_sub_with_regex(self): def test_sub_with_regex(self):
"""Test substitutions with a regular expression.""" """Test substitutions with a regular expression."""
m = strutils.MultiSub({ m = strutils.MultiReplace({
r'cat': 'kedi', r'cat': 'kedi',
r'purple': 'mor', r'purple': 'mor',
r'q\w+?t': 'dinglehopper' r'q\w+?t': 'dinglehopper'
}) }, regex=True)
self.assertEqual( self.assertEqual(
m.sub('The purple cat ate a quart of jelly'), m.sub('The purple cat ate a quart of jelly'),
'The mor kedi ate a dinglehopper of jelly' 'The mor kedi ate a dinglehopper of jelly'
@ -73,11 +83,11 @@ class TestMultiSub(TestCase):
def test_sub_with_list(self): def test_sub_with_list(self):
"""Test substitutions from an iterable instead of a dictionary.""" """Test substitutions from an iterable instead of a dictionary."""
m = strutils.MultiSub([ m = strutils.MultiReplace([
(r'cat', 'kedi'), (r'cat', 'kedi'),
(r'purple', 'mor'), (r'purple', 'mor'),
(r'q\w+?t', 'dinglehopper'), (r'q\w+?t', 'dinglehopper'),
]) ], regex=True)
self.assertEqual( self.assertEqual(
m.sub('The purple cat ate a quart of jelly'), m.sub('The purple cat ate a quart of jelly'),
'The mor kedi ate a dinglehopper of jelly' 'The mor kedi ate a dinglehopper of jelly'
@ -86,7 +96,7 @@ class TestMultiSub(TestCase):
def test_sub_with_compiled_regex(self): def test_sub_with_compiled_regex(self):
"""Test substitutions where some regular expressiosn are compiled.""" """Test substitutions where some regular expressiosn are compiled."""
exp = re.compile(r'q\w+?t') exp = re.compile(r'q\w+?t')
m = strutils.MultiSub([ m = strutils.MultiReplace([
(r'cat', 'kedi'), (r'cat', 'kedi'),
(r'purple', 'mor'), (r'purple', 'mor'),
(exp, 'dinglehopper'), (exp, 'dinglehopper'),
@ -95,3 +105,8 @@ class TestMultiSub(TestCase):
m.sub('The purple cat ate a quart of jelly'), m.sub('The purple cat ate a quart of jelly'),
'The mor kedi ate a dinglehopper of jelly' 'The mor kedi ate a dinglehopper of jelly'
) )
def test_substitutions_with_regex_chars(self):
"""Test replacing values that have special regex characters."""
m = strutils.MultiReplace({'cat.+': 'kedi', r'purple': 'mor', })
self.assertEqual(m.sub('The cat.+ is purple'), 'The kedi is mor')