Issue 11747: Fix output format for context diffs.

This commit is contained in:
Raymond Hettinger 2011-04-12 15:14:12 -07:00
parent 308f14aeae
commit 3780542039
2 changed files with 136 additions and 37 deletions

View File

@ -1140,6 +1140,21 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"):
return ch in ws
########################################################################
### Unified Diff
########################################################################
def _format_range_unified(start, stop):
'Convert range to the "ed" format'
# Per the diff spec at http://www.unix.org/single_unix_specification/
beginning = start + 1 # lines start numbering with one
length = stop - start
if length == 1:
return '{}'.format(beginning)
if not length:
beginning -= 1 # empty ranges begin at line just before the range
return '{},{}'.format(beginning, length)
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
tofiledate='', n=3, lineterm='\n'):
r"""
@ -1160,18 +1175,18 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
The unidiff format normally has a header for filenames and modification
times. Any or all of these may be specified using strings for
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
times are normally expressed in the format returned by time.ctime().
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
The modification times are normally expressed in the ISO 8601 format.
Example:
>>> for line in unified_diff('one two three four'.split(),
... 'zero one tree four'.split(), 'Original', 'Current',
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
... '2005-01-26 23:30:50', '2010-04-02 10:20:52',
... lineterm=''):
... print(line)
--- Original Sat Jan 26 23:30:50 1991
+++ Current Fri Jun 06 10:20:52 2003
... print(line) # doctest: +NORMALIZE_WHITESPACE
--- Original 2005-01-26 23:30:50
+++ Current 2010-04-02 10:20:52
@@ -1,4 +1,4 @@
+zero
one
@ -1184,23 +1199,45 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
started = False
for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
if not started:
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
started = True
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
todate = '\t{}'.format(tofiledate) if tofiledate else ''
yield '--- {}{}{}'.format(fromfile, fromdate, lineterm)
yield '+++ {}{}{}'.format(tofile, todate, lineterm)
first, last = group[0], group[-1]
file1_range = _format_range_unified(first[1], last[2])
file2_range = _format_range_unified(first[3], last[4])
yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm)
for tag, i1, i2, j1, j2 in group:
if tag == 'equal':
for line in a[i1:i2]:
yield ' ' + line
continue
if tag == 'replace' or tag == 'delete':
if tag in {'replace', 'delete'}:
for line in a[i1:i2]:
yield '-' + line
if tag == 'replace' or tag == 'insert':
if tag in {'replace', 'insert'}:
for line in b[j1:j2]:
yield '+' + line
########################################################################
### Context Diff
########################################################################
def _format_range_context(start, stop):
'Convert range to the "ed" format'
# Per the diff spec at http://www.unix.org/single_unix_specification/
beginning = start + 1 # lines start numbering with one
length = stop - start
if not length:
beginning -= 1 # empty ranges begin at line just before the range
if length <= 1:
return '{}'.format(beginning)
return '{},{}'.format(beginning, beginning + length - 1)
# See http://www.unix.org/single_unix_specification/
def context_diff(a, b, fromfile='', tofile='',
fromfiledate='', tofiledate='', n=3, lineterm='\n'):
@ -1223,17 +1260,16 @@ def context_diff(a, b, fromfile='', tofile='',
The context diff format normally has a header for filenames and
modification times. Any or all of these may be specified using
strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
The modification times are normally expressed in the format returned
by time.ctime(). If not specified, the strings default to blanks.
The modification times are normally expressed in the ISO 8601 format.
If not specified, the strings default to blanks.
Example:
>>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')),
... end="")
*** Original Sat Jan 26 23:30:50 1991
--- Current Fri Jun 06 10:22:46 2003
*** Original
--- Current
***************
*** 1,4 ****
one
@ -1247,36 +1283,36 @@ def context_diff(a, b, fromfile='', tofile='',
four
"""
prefix = dict(insert='+ ', delete='- ', replace='! ', equal=' ')
started = False
prefixmap = {'insert':'+ ', 'delete':'- ', 'replace':'! ', 'equal':' '}
for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
if not started:
yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
started = True
fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
todate = '\t{}'.format(tofiledate) if tofiledate else ''
yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
yield '--- {}{}{}'.format(tofile, todate, lineterm)
yield '***************%s' % (lineterm,)
if group[-1][2] - group[0][1] >= 2:
yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
else:
yield '*** %d ****%s' % (group[-1][2], lineterm)
visiblechanges = [e for e in group if e[0] in ('replace', 'delete')]
if visiblechanges:
first, last = group[0], group[-1]
yield '***************' + lineterm
file1_range = _format_range_context(first[1], last[2])
yield '*** {} ****{}'.format(file1_range, lineterm)
if any(tag in {'replace', 'delete'} for tag, _, _, _, _ in group):
for tag, i1, i2, _, _ in group:
if tag != 'insert':
for line in a[i1:i2]:
yield prefixmap[tag] + line
yield prefix[tag] + line
if group[-1][4] - group[0][3] >= 2:
yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
else:
yield '--- %d ----%s' % (group[-1][4], lineterm)
visiblechanges = [e for e in group if e[0] in ('replace', 'insert')]
if visiblechanges:
file2_range = _format_range_context(first[3], last[4])
yield '--- {} ----{}'.format(file2_range, lineterm)
if any(tag in {'replace', 'insert'} for tag, _, _, _, _ in group):
for tag, _, _, j1, j2 in group:
if tag != 'delete':
for line in b[j1:j2]:
yield prefixmap[tag] + line
yield prefix[tag] + line
def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
r"""

View File

@ -159,10 +159,73 @@ def test_recursion_limit(self):
difflib.SequenceMatcher(None, old, new).get_opcodes()
class TestOutputFormat(unittest.TestCase):
def test_tab_delimiter(self):
args = ['one', 'two', 'Original', 'Current',
'2005-01-26 23:30:50', '2010-04-02 10:20:52']
ud = difflib.unified_diff(*args, lineterm='')
self.assertEqual(list(ud)[0:2], [
"--- Original\t2005-01-26 23:30:50",
"+++ Current\t2010-04-02 10:20:52"])
cd = difflib.context_diff(*args, lineterm='')
self.assertEqual(list(cd)[0:2], [
"*** Original\t2005-01-26 23:30:50",
"--- Current\t2010-04-02 10:20:52"])
def test_no_trailing_tab_on_empty_filedate(self):
args = ['one', 'two', 'Original', 'Current']
ud = difflib.unified_diff(*args, lineterm='')
self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
cd = difflib.context_diff(*args, lineterm='')
self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
def test_range_format_unified(self):
# Per the diff spec at http://www.unix.org/single_unix_specification/
spec = '''\
Each <range> field shall be of the form:
%1d", <beginning line number> if the range contains exactly one line,
and:
"%1d,%1d", <beginning line number>, <number of lines> otherwise.
If a range is empty, its beginning line number shall be the number of
the line just before the range, or 0 if the empty range starts the file.
'''
fmt = difflib._format_range_unified
self.assertEqual(fmt(3,3), '3,0')
self.assertEqual(fmt(3,4), '4')
self.assertEqual(fmt(3,5), '4,2')
self.assertEqual(fmt(3,6), '4,3')
self.assertEqual(fmt(0,0), '0,0')
def test_range_format_context(self):
# Per the diff spec at http://www.unix.org/single_unix_specification/
spec = '''\
The range of lines in file1 shall be written in the following format
if the range contains two or more lines:
"*** %d,%d ****\n", <beginning line number>, <ending line number>
and the following format otherwise:
"*** %d ****\n", <ending line number>
The ending line number of an empty range shall be the number of the preceding line,
or 0 if the range is at the start of the file.
Next, the range of lines in file2 shall be written in the following format
if the range contains two or more lines:
"--- %d,%d ----\n", <beginning line number>, <ending line number>
and the following format otherwise:
"--- %d ----\n", <ending line number>
'''
fmt = difflib._format_range_context
self.assertEqual(fmt(3,3), '3')
self.assertEqual(fmt(3,4), '4')
self.assertEqual(fmt(3,5), '4,5')
self.assertEqual(fmt(3,6), '4,6')
self.assertEqual(fmt(0,0), '0')
def test_main():
difflib.HtmlDiff._default_prefix = 0
Doctests = doctest.DocTestSuite(difflib)
run_unittest(TestSFpatches, TestSFbugs, Doctests)
run_unittest(TestSFpatches, TestSFbugs, Doctests, TestOutputFormat)
if __name__ == '__main__':
test_main()