mirror of https://github.com/mahmoud/boltons.git
Improve arguments & add shortcut function
- Escape strings using re.escape unless regex flag is set - Added shortcut function multi_replace - New unittests
This commit is contained in:
parent
387e59b54c
commit
f7e1cb8111
|
@ -974,19 +974,26 @@ def format_int_list(int_list, delim=',', range_delim='-', delim_space=False):
|
|||
return output_str
|
||||
|
||||
|
||||
class MultiSub(object):
|
||||
class MultiReplace(object):
|
||||
"""
|
||||
Multisub is a tool for doing multiple find/replace actions in one pass.
|
||||
MultiReplace is a tool for doing multiple find/replace actions in one pass.
|
||||
|
||||
Given a mapping of values to be replaced it allows for all of the matching
|
||||
values to be replaced in a single pass which can save a lot of performance
|
||||
on very large strings. In addition to simple replace, it also allows for
|
||||
replacing based on regular expressions.
|
||||
|
||||
Keyword Arguments:
|
||||
|
||||
:type regex: bool
|
||||
:param regex: Treat search keys as regular expressions [Default: False]
|
||||
:type flags: int
|
||||
:param flags: flags to pass to the regex engine during compile
|
||||
|
||||
Dictionary Usage::
|
||||
|
||||
from lrmslib import stringutils
|
||||
s = stringutils.MultiSub({
|
||||
s = stringutils.MultiReplace({
|
||||
'foo': 'zoo',
|
||||
'cat': 'hat',
|
||||
'bat': 'kraken'
|
||||
|
@ -997,7 +1004,7 @@ class MultiSub(object):
|
|||
Iterable Usage::
|
||||
|
||||
from lrmslib import stringutils
|
||||
s = stringutils.MultiSub([
|
||||
s = stringutils.MultiReplace([
|
||||
('foo', 'zoo'),
|
||||
('cat', 'hat'),
|
||||
('bat', 'kraken)'
|
||||
|
@ -1014,7 +1021,7 @@ class MultiSub(object):
|
|||
|
||||
>>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar')
|
||||
'bar bar bar'
|
||||
>>> m = MultiSub({'foo': 'baz', 'baz': 'bar'})
|
||||
>>> m = MultiReplace({'foo': 'baz', 'baz': 'bar'})
|
||||
>>> m.sub('foo bar baz')
|
||||
'baz bar bar'
|
||||
|
||||
|
@ -1024,8 +1031,13 @@ class MultiSub(object):
|
|||
of a dictionary.
|
||||
"""
|
||||
|
||||
def __init__(self, sub_map):
|
||||
def __init__(self, sub_map, **kwargs):
|
||||
"""Compile any regular expressions that have been passed."""
|
||||
options = {
|
||||
'regex': False,
|
||||
'flags': 0,
|
||||
}
|
||||
options.update(kwargs)
|
||||
self.sub_data = []
|
||||
|
||||
if isinstance(sub_map, collections.Mapping):
|
||||
|
@ -1033,26 +1045,36 @@ class MultiSub(object):
|
|||
|
||||
for exp, replacement in sub_map:
|
||||
if isinstance(exp, basestring):
|
||||
exp = re.compile(exp)
|
||||
# If we're not treating input strings like a regex, escape it
|
||||
if not options['regex']:
|
||||
exp = re.escape(exp)
|
||||
exp = re.compile(exp, flags=options['flags'])
|
||||
self.sub_data.append((exp, replacement))
|
||||
|
||||
self.combined_pattern = re.compile('|'.join([
|
||||
'(?:{0})'.format(x.pattern) for x, _
|
||||
in self.sub_data
|
||||
]))
|
||||
]), flags=options['flags'])
|
||||
|
||||
def __call__(self, match):
|
||||
def _get_value(self, match):
|
||||
"""Given a match object find replacement value."""
|
||||
value = match.string[match.start():match.end()]
|
||||
for exp, replacement in self.sub_data:
|
||||
if exp.match(value):
|
||||
return replacement
|
||||
return value
|
||||
|
||||
def sub(self, input_string):
|
||||
def sub(self, text):
|
||||
"""
|
||||
Run substitutions.
|
||||
Run substitutions on the input text.
|
||||
|
||||
Given an input string, run all substitutions given in the
|
||||
constructor.
|
||||
"""
|
||||
return self.combined_pattern.sub(self, input_string)
|
||||
return self.combined_pattern.sub(self._get_value, text)
|
||||
|
||||
|
||||
def multi_replace(text, sub_map, **kwargs):
|
||||
"""Shortcut function to invoke multi-replace in a single command."""
|
||||
m = MultiReplace(sub_map, **kwargs)
|
||||
return m.sub(text)
|
||||
|
|
|
@ -47,25 +47,35 @@ def test_format_int_list():
|
|||
assert strutils.format_int_list([5, 6, 7, 8], delim_space=True) == '5-8'
|
||||
|
||||
|
||||
class TestMultiSub(TestCase):
|
||||
class TestMultiReplace(TestCase):
|
||||
|
||||
def test_simple_substitutions(self):
|
||||
"""Test replacing multiple values."""
|
||||
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', })
|
||||
m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
|
||||
self.assertEqual(m.sub('The cat is purple'), 'The kedi is mor')
|
||||
|
||||
def test_shortcut_function(self):
|
||||
"""Test replacing multiple values."""
|
||||
self.assertEqual(
|
||||
strutils.multi_replace(
|
||||
'The cat is purple',
|
||||
{r'cat': 'kedi', r'purple': 'mor', }
|
||||
),
|
||||
'The kedi is mor'
|
||||
)
|
||||
|
||||
def test_substitutions_in_word(self):
|
||||
"""Test replacing multiple values that are substrings of a word."""
|
||||
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', })
|
||||
m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
|
||||
self.assertEqual(m.sub('Thecatispurple'), 'Thekediismor')
|
||||
|
||||
def test_sub_with_regex(self):
|
||||
"""Test substitutions with a regular expression."""
|
||||
m = strutils.MultiSub({
|
||||
m = strutils.MultiReplace({
|
||||
r'cat': 'kedi',
|
||||
r'purple': 'mor',
|
||||
r'q\w+?t': 'dinglehopper'
|
||||
})
|
||||
}, regex=True)
|
||||
self.assertEqual(
|
||||
m.sub('The purple cat ate a quart of jelly'),
|
||||
'The mor kedi ate a dinglehopper of jelly'
|
||||
|
@ -73,11 +83,11 @@ class TestMultiSub(TestCase):
|
|||
|
||||
def test_sub_with_list(self):
|
||||
"""Test substitutions from an iterable instead of a dictionary."""
|
||||
m = strutils.MultiSub([
|
||||
m = strutils.MultiReplace([
|
||||
(r'cat', 'kedi'),
|
||||
(r'purple', 'mor'),
|
||||
(r'q\w+?t', 'dinglehopper'),
|
||||
])
|
||||
], regex=True)
|
||||
self.assertEqual(
|
||||
m.sub('The purple cat ate a quart of jelly'),
|
||||
'The mor kedi ate a dinglehopper of jelly'
|
||||
|
@ -86,7 +96,7 @@ class TestMultiSub(TestCase):
|
|||
def test_sub_with_compiled_regex(self):
|
||||
"""Test substitutions where some regular expressiosn are compiled."""
|
||||
exp = re.compile(r'q\w+?t')
|
||||
m = strutils.MultiSub([
|
||||
m = strutils.MultiReplace([
|
||||
(r'cat', 'kedi'),
|
||||
(r'purple', 'mor'),
|
||||
(exp, 'dinglehopper'),
|
||||
|
@ -95,3 +105,8 @@ class TestMultiSub(TestCase):
|
|||
m.sub('The purple cat ate a quart of jelly'),
|
||||
'The mor kedi ate a dinglehopper of jelly'
|
||||
)
|
||||
|
||||
def test_substitutions_with_regex_chars(self):
|
||||
"""Test replacing values that have special regex characters."""
|
||||
m = strutils.MultiReplace({'cat.+': 'kedi', r'purple': 'mor', })
|
||||
self.assertEqual(m.sub('The cat.+ is purple'), 'The kedi is mor')
|
||||
|
|
Loading…
Reference in New Issue