mirror of https://github.com/mahmoud/boltons.git
Improve arguments & add shortcut function
- Escape strings using re.escape unless regex flag is set - Added shortcut function multi_replace - New unittests
This commit is contained in:
parent
387e59b54c
commit
f7e1cb8111
|
@ -974,19 +974,26 @@ def format_int_list(int_list, delim=',', range_delim='-', delim_space=False):
|
||||||
return output_str
|
return output_str
|
||||||
|
|
||||||
|
|
||||||
class MultiSub(object):
|
class MultiReplace(object):
|
||||||
"""
|
"""
|
||||||
Multisub is a tool for doing multiple find/replace actions in one pass.
|
MultiReplace is a tool for doing multiple find/replace actions in one pass.
|
||||||
|
|
||||||
Given a mapping of values to be replaced it allows for all of the matching
|
Given a mapping of values to be replaced it allows for all of the matching
|
||||||
values to be replaced in a single pass which can save a lot of performance
|
values to be replaced in a single pass which can save a lot of performance
|
||||||
on very large strings. In addition to simple replace, it also allows for
|
on very large strings. In addition to simple replace, it also allows for
|
||||||
replacing based on regular expressions.
|
replacing based on regular expressions.
|
||||||
|
|
||||||
|
Keyword Arguments:
|
||||||
|
|
||||||
|
:type regex: bool
|
||||||
|
:param regex: Treat search keys as regular expressions [Default: False]
|
||||||
|
:type flags: int
|
||||||
|
:param flags: flags to pass to the regex engine during compile
|
||||||
|
|
||||||
Dictionary Usage::
|
Dictionary Usage::
|
||||||
|
|
||||||
from lrmslib import stringutils
|
from lrmslib import stringutils
|
||||||
s = stringutils.MultiSub({
|
s = stringutils.MultiReplace({
|
||||||
'foo': 'zoo',
|
'foo': 'zoo',
|
||||||
'cat': 'hat',
|
'cat': 'hat',
|
||||||
'bat': 'kraken'
|
'bat': 'kraken'
|
||||||
|
@ -997,7 +1004,7 @@ class MultiSub(object):
|
||||||
Iterable Usage::
|
Iterable Usage::
|
||||||
|
|
||||||
from lrmslib import stringutils
|
from lrmslib import stringutils
|
||||||
s = stringutils.MultiSub([
|
s = stringutils.MultiReplace([
|
||||||
('foo', 'zoo'),
|
('foo', 'zoo'),
|
||||||
('cat', 'hat'),
|
('cat', 'hat'),
|
||||||
('bat', 'kraken)'
|
('bat', 'kraken)'
|
||||||
|
@ -1014,7 +1021,7 @@ class MultiSub(object):
|
||||||
|
|
||||||
>>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar')
|
>>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar')
|
||||||
'bar bar bar'
|
'bar bar bar'
|
||||||
>>> m = MultiSub({'foo': 'baz', 'baz': 'bar'})
|
>>> m = MultiReplace({'foo': 'baz', 'baz': 'bar'})
|
||||||
>>> m.sub('foo bar baz')
|
>>> m.sub('foo bar baz')
|
||||||
'baz bar bar'
|
'baz bar bar'
|
||||||
|
|
||||||
|
@ -1024,8 +1031,13 @@ class MultiSub(object):
|
||||||
of a dictionary.
|
of a dictionary.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, sub_map):
|
def __init__(self, sub_map, **kwargs):
|
||||||
"""Compile any regular expressions that have been passed."""
|
"""Compile any regular expressions that have been passed."""
|
||||||
|
options = {
|
||||||
|
'regex': False,
|
||||||
|
'flags': 0,
|
||||||
|
}
|
||||||
|
options.update(kwargs)
|
||||||
self.sub_data = []
|
self.sub_data = []
|
||||||
|
|
||||||
if isinstance(sub_map, collections.Mapping):
|
if isinstance(sub_map, collections.Mapping):
|
||||||
|
@ -1033,26 +1045,36 @@ class MultiSub(object):
|
||||||
|
|
||||||
for exp, replacement in sub_map:
|
for exp, replacement in sub_map:
|
||||||
if isinstance(exp, basestring):
|
if isinstance(exp, basestring):
|
||||||
exp = re.compile(exp)
|
# If we're not treating input strings like a regex, escape it
|
||||||
|
if not options['regex']:
|
||||||
|
exp = re.escape(exp)
|
||||||
|
exp = re.compile(exp, flags=options['flags'])
|
||||||
self.sub_data.append((exp, replacement))
|
self.sub_data.append((exp, replacement))
|
||||||
|
|
||||||
self.combined_pattern = re.compile('|'.join([
|
self.combined_pattern = re.compile('|'.join([
|
||||||
'(?:{0})'.format(x.pattern) for x, _
|
'(?:{0})'.format(x.pattern) for x, _
|
||||||
in self.sub_data
|
in self.sub_data
|
||||||
]))
|
]), flags=options['flags'])
|
||||||
|
|
||||||
def __call__(self, match):
|
def _get_value(self, match):
|
||||||
|
"""Given a match object find replacement value."""
|
||||||
value = match.string[match.start():match.end()]
|
value = match.string[match.start():match.end()]
|
||||||
for exp, replacement in self.sub_data:
|
for exp, replacement in self.sub_data:
|
||||||
if exp.match(value):
|
if exp.match(value):
|
||||||
return replacement
|
return replacement
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def sub(self, input_string):
|
def sub(self, text):
|
||||||
"""
|
"""
|
||||||
Run substitutions.
|
Run substitutions on the input text.
|
||||||
|
|
||||||
Given an input string, run all substitutions given in the
|
Given an input string, run all substitutions given in the
|
||||||
constructor.
|
constructor.
|
||||||
"""
|
"""
|
||||||
return self.combined_pattern.sub(self, input_string)
|
return self.combined_pattern.sub(self._get_value, text)
|
||||||
|
|
||||||
|
|
||||||
|
def multi_replace(text, sub_map, **kwargs):
|
||||||
|
"""Shortcut function to invoke multi-replace in a single command."""
|
||||||
|
m = MultiReplace(sub_map, **kwargs)
|
||||||
|
return m.sub(text)
|
||||||
|
|
|
@ -47,25 +47,35 @@ def test_format_int_list():
|
||||||
assert strutils.format_int_list([5, 6, 7, 8], delim_space=True) == '5-8'
|
assert strutils.format_int_list([5, 6, 7, 8], delim_space=True) == '5-8'
|
||||||
|
|
||||||
|
|
||||||
class TestMultiSub(TestCase):
|
class TestMultiReplace(TestCase):
|
||||||
|
|
||||||
def test_simple_substitutions(self):
|
def test_simple_substitutions(self):
|
||||||
"""Test replacing multiple values."""
|
"""Test replacing multiple values."""
|
||||||
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', })
|
m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
|
||||||
self.assertEqual(m.sub('The cat is purple'), 'The kedi is mor')
|
self.assertEqual(m.sub('The cat is purple'), 'The kedi is mor')
|
||||||
|
|
||||||
|
def test_shortcut_function(self):
|
||||||
|
"""Test replacing multiple values."""
|
||||||
|
self.assertEqual(
|
||||||
|
strutils.multi_replace(
|
||||||
|
'The cat is purple',
|
||||||
|
{r'cat': 'kedi', r'purple': 'mor', }
|
||||||
|
),
|
||||||
|
'The kedi is mor'
|
||||||
|
)
|
||||||
|
|
||||||
def test_substitutions_in_word(self):
|
def test_substitutions_in_word(self):
|
||||||
"""Test replacing multiple values that are substrings of a word."""
|
"""Test replacing multiple values that are substrings of a word."""
|
||||||
m = strutils.MultiSub({r'cat': 'kedi', r'purple': 'mor', })
|
m = strutils.MultiReplace({r'cat': 'kedi', r'purple': 'mor', })
|
||||||
self.assertEqual(m.sub('Thecatispurple'), 'Thekediismor')
|
self.assertEqual(m.sub('Thecatispurple'), 'Thekediismor')
|
||||||
|
|
||||||
def test_sub_with_regex(self):
|
def test_sub_with_regex(self):
|
||||||
"""Test substitutions with a regular expression."""
|
"""Test substitutions with a regular expression."""
|
||||||
m = strutils.MultiSub({
|
m = strutils.MultiReplace({
|
||||||
r'cat': 'kedi',
|
r'cat': 'kedi',
|
||||||
r'purple': 'mor',
|
r'purple': 'mor',
|
||||||
r'q\w+?t': 'dinglehopper'
|
r'q\w+?t': 'dinglehopper'
|
||||||
})
|
}, regex=True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
m.sub('The purple cat ate a quart of jelly'),
|
m.sub('The purple cat ate a quart of jelly'),
|
||||||
'The mor kedi ate a dinglehopper of jelly'
|
'The mor kedi ate a dinglehopper of jelly'
|
||||||
|
@ -73,11 +83,11 @@ class TestMultiSub(TestCase):
|
||||||
|
|
||||||
def test_sub_with_list(self):
|
def test_sub_with_list(self):
|
||||||
"""Test substitutions from an iterable instead of a dictionary."""
|
"""Test substitutions from an iterable instead of a dictionary."""
|
||||||
m = strutils.MultiSub([
|
m = strutils.MultiReplace([
|
||||||
(r'cat', 'kedi'),
|
(r'cat', 'kedi'),
|
||||||
(r'purple', 'mor'),
|
(r'purple', 'mor'),
|
||||||
(r'q\w+?t', 'dinglehopper'),
|
(r'q\w+?t', 'dinglehopper'),
|
||||||
])
|
], regex=True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
m.sub('The purple cat ate a quart of jelly'),
|
m.sub('The purple cat ate a quart of jelly'),
|
||||||
'The mor kedi ate a dinglehopper of jelly'
|
'The mor kedi ate a dinglehopper of jelly'
|
||||||
|
@ -86,7 +96,7 @@ class TestMultiSub(TestCase):
|
||||||
def test_sub_with_compiled_regex(self):
|
def test_sub_with_compiled_regex(self):
|
||||||
"""Test substitutions where some regular expressiosn are compiled."""
|
"""Test substitutions where some regular expressiosn are compiled."""
|
||||||
exp = re.compile(r'q\w+?t')
|
exp = re.compile(r'q\w+?t')
|
||||||
m = strutils.MultiSub([
|
m = strutils.MultiReplace([
|
||||||
(r'cat', 'kedi'),
|
(r'cat', 'kedi'),
|
||||||
(r'purple', 'mor'),
|
(r'purple', 'mor'),
|
||||||
(exp, 'dinglehopper'),
|
(exp, 'dinglehopper'),
|
||||||
|
@ -95,3 +105,8 @@ class TestMultiSub(TestCase):
|
||||||
m.sub('The purple cat ate a quart of jelly'),
|
m.sub('The purple cat ate a quart of jelly'),
|
||||||
'The mor kedi ate a dinglehopper of jelly'
|
'The mor kedi ate a dinglehopper of jelly'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_substitutions_with_regex_chars(self):
|
||||||
|
"""Test replacing values that have special regex characters."""
|
||||||
|
m = strutils.MultiReplace({'cat.+': 'kedi', r'purple': 'mor', })
|
||||||
|
self.assertEqual(m.sub('The cat.+ is purple'), 'The kedi is mor')
|
||||||
|
|
Loading…
Reference in New Issue