Improve arguments & add shortcut function

- Escape strings using re.escape unless regex flag
  is set
- Added shortcut function multi_replace
- New unittests
This commit is contained in:
Brant Watson 2018-06-27 16:51:35 -05:00 committed by Mahmoud Hashemi
parent 387e59b54c
commit f7e1cb8111
2 changed files with 57 additions and 20 deletions

View File

@ -974,19 +974,26 @@ def format_int_list(int_list, delim=',', range_delim='-', delim_space=False):
return output_str
class MultiSub(object):
class MultiReplace(object):
"""
Multisub is a tool for doing multiple find/replace actions in one pass.
MultiReplace is a tool for doing multiple find/replace actions in one pass.
Given a mapping of values to be replaced it allows for all of the matching
values to be replaced in a single pass which can save a lot of performance
on very large strings. In addition to simple replace, it also allows for
replacing based on regular expressions.
Keyword Arguments:
:type regex: bool
:param regex: Treat search keys as regular expressions [Default: False]
:type flags: int
:param flags: flags to pass to the regex engine during compile
Dictionary Usage::
from lrmslib import stringutils
s = stringutils.MultiSub({
s = stringutils.MultiReplace({
'foo': 'zoo',
'cat': 'hat',
'bat': 'kraken'
@ -997,7 +1004,7 @@ class MultiSub(object):
Iterable Usage::
from lrmslib import stringutils
s = stringutils.MultiSub([
s = stringutils.MultiReplace([
('foo', 'zoo'),
('cat', 'hat'),
('bat', 'kraken)'
@ -1014,7 +1021,7 @@ class MultiSub(object):
>>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar')
'bar bar bar'
>>> m = MultiSub({'foo': 'baz', 'baz': 'bar'})
>>> m = MultiReplace({'foo': 'baz', 'baz': 'bar'})
>>> m.sub('foo bar baz')
'baz bar bar'
@ -1024,8 +1031,13 @@ class MultiSub(object):
of a dictionary.
"""
def __init__(self, sub_map):
def __init__(self, sub_map, **kwargs):
"""Compile any regular expressions that have been passed."""
options = {
'regex': False,
'flags': 0,
}
options.update(kwargs)
self.sub_data = []
if isinstance(sub_map, collections.Mapping):
@ -1033,26 +1045,36 @@ class MultiSub(object):
for exp, replacement in sub_map:
if isinstance(exp, basestring):
exp = re.compile(exp)
# If we're not treating input strings like a regex, escape it
if not options['regex']:
exp = re.escape(exp)
exp = re.compile(exp, flags=options['flags'])
self.sub_data.append((exp, replacement))
self.combined_pattern = re.compile('|'.join([
'(?:{0})'.format(x.pattern) for x, _
in self.sub_data
]))
]), flags=options['flags'])
def __call__(self, match):
def _get_value(self, match):
"""Given a match object find replacement value."""
value = match.string[match.start():match.end()]
for exp, replacement in self.sub_data:
if exp.match(value):
return replacement
return value
def sub(self, input_string):
def sub(self, text):
"""
Run substitutions.
Run substitutions on the input text.
Given an input string, run all substitutions given in the
constructor.
"""
return self.combined_pattern.sub(self, input_string)
return self.combined_pattern.sub(self._get_value, text)
def multi_replace(text, sub_map, **kwargs):
"""Shortcut function to invoke multi-replace in a single command."""
m = MultiReplace(sub_map, **kwargs)
return m.sub(text)

View File

@ -47,25 +47,35 @@ def test_format_int_list():
assert strutils.format_int_list([5, 6, 7, 8], delim_space=True) == '5-8'
class TestMultiSub(TestCase):
class TestMultiReplace(TestCase):
def test_simple_substitutions(self):
"""Test replacing multiple values."""
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', })
m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
self.assertEqual(m.sub('The cat is purple'), 'The kedi is mor')
def test_shortcut_function(self):
"""Test replacing multiple values."""
self.assertEqual(
strutils.multi_replace(
'The cat is purple',
{r'cat': 'kedi', r'purple': 'mor', }
),
'The kedi is mor'
)
def test_substitutions_in_word(self):
"""Test replacing multiple values that are substrings of a word."""
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', })
m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
self.assertEqual(m.sub('Thecatispurple'), 'Thekediismor')
def test_sub_with_regex(self):
"""Test substitutions with a regular expression."""
m = strutils.MultiSub({
m = strutils.MultiReplace({
r'cat': 'kedi',
r'purple': 'mor',
r'q\w+?t': 'dinglehopper'
})
}, regex=True)
self.assertEqual(
m.sub('The purple cat ate a quart of jelly'),
'The mor kedi ate a dinglehopper of jelly'
@ -73,11 +83,11 @@ class TestMultiSub(TestCase):
def test_sub_with_list(self):
"""Test substitutions from an iterable instead of a dictionary."""
m = strutils.MultiSub([
m = strutils.MultiReplace([
(r'cat', 'kedi'),
(r'purple', 'mor'),
(r'q\w+?t', 'dinglehopper'),
])
], regex=True)
self.assertEqual(
m.sub('The purple cat ate a quart of jelly'),
'The mor kedi ate a dinglehopper of jelly'
@ -86,7 +96,7 @@ class TestMultiSub(TestCase):
def test_sub_with_compiled_regex(self):
"""Test substitutions where some regular expressiosn are compiled."""
exp = re.compile(r'q\w+?t')
m = strutils.MultiSub([
m = strutils.MultiReplace([
(r'cat', 'kedi'),
(r'purple', 'mor'),
(exp, 'dinglehopper'),
@ -95,3 +105,8 @@ class TestMultiSub(TestCase):
m.sub('The purple cat ate a quart of jelly'),
'The mor kedi ate a dinglehopper of jelly'
)
def test_substitutions_with_regex_chars(self):
"""Test replacing values that have special regex characters."""
m = strutils.MultiReplace({'cat.+': 'kedi', r'purple': 'mor', })
self.assertEqual(m.sub('The cat.+ is purple'), 'The kedi is mor')