From e5034378cc2e46f9a7233269a5687bfec8c8c303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Tue, 8 Aug 2000 08:04:29 +0000 Subject: [PATCH] Removing UTF-16 aware Unicode comparison code. This kind of compare function (together with other locale aware ones) should into a new collation support module. See python-dev for a discussion of this removal. Note: This patch should also be applied to the 1.6 branch. --- Lib/test/output/test_unicode | 1 - Lib/test/test_unicode.py | 97 +++++++++++++++++++----------------- Objects/unicodeobject.c | 33 ++++++++++++ 3 files changed, 83 insertions(+), 48 deletions(-) diff --git a/Lib/test/output/test_unicode b/Lib/test/output/test_unicode index 87379cdbb92..783a4860ab4 100644 --- a/Lib/test/output/test_unicode +++ b/Lib/test/output/test_unicode @@ -1,6 +1,5 @@ test_unicode Testing Unicode comparisons... done. -Testing UTF-16 code point order comparisons... done. Testing Unicode contains method... done. Testing Unicode formatting strings... done. Testing builtin codecs... done. diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 76a2591920e..8479c209475 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -168,56 +168,59 @@ def __init__(self): self.seq = [7, u'hello', 123L] assert u'abc' < u'abcd' print 'done.' -print 'Testing UTF-16 code point order comparisons...', -#No surrogates, no fixup required. -assert u'\u0061' < u'\u20ac' -# Non surrogate below surrogate value, no fixup required -assert u'\u0061' < u'\ud800\udc02' +if 0: + # Move these tests to a Unicode collation module test... -# Non surrogate above surrogate value, fixup required -def test_lecmp(s, s2): - assert s < s2 , "comparison failed on %s < %s" % (s, s2) - -def test_fixup(s): - s2 = u'\ud800\udc01' - test_lecmp(s, s2) - s2 = u'\ud900\udc01' - test_lecmp(s, s2) - s2 = u'\uda00\udc01' - test_lecmp(s, s2) - s2 = u'\udb00\udc01' - test_lecmp(s, s2) - s2 = u'\ud800\udd01' - test_lecmp(s, s2) - s2 = u'\ud900\udd01' - test_lecmp(s, s2) - s2 = u'\uda00\udd01' - test_lecmp(s, s2) - s2 = u'\udb00\udd01' - test_lecmp(s, s2) - s2 = u'\ud800\ude01' - test_lecmp(s, s2) - s2 = u'\ud900\ude01' - test_lecmp(s, s2) - s2 = u'\uda00\ude01' - test_lecmp(s, s2) - s2 = u'\udb00\ude01' - test_lecmp(s, s2) - s2 = u'\ud800\udfff' - test_lecmp(s, s2) - s2 = u'\ud900\udfff' - test_lecmp(s, s2) - s2 = u'\uda00\udfff' - test_lecmp(s, s2) - s2 = u'\udb00\udfff' - test_lecmp(s, s2) + print 'Testing UTF-16 code point order comparisons...', + #No surrogates, no fixup required. + assert u'\u0061' < u'\u20ac' + # Non surrogate below surrogate value, no fixup required + assert u'\u0061' < u'\ud800\udc02' -test_fixup(u'\ue000') -test_fixup(u'\uff61') + # Non surrogate above surrogate value, fixup required + def test_lecmp(s, s2): + assert s < s2 , "comparison failed on %s < %s" % (s, s2) -# Surrogates on both sides, no fixup required -assert u'\ud800\udc02' < u'\ud84d\udc56' -print 'done.' + def test_fixup(s): + s2 = u'\ud800\udc01' + test_lecmp(s, s2) + s2 = u'\ud900\udc01' + test_lecmp(s, s2) + s2 = u'\uda00\udc01' + test_lecmp(s, s2) + s2 = u'\udb00\udc01' + test_lecmp(s, s2) + s2 = u'\ud800\udd01' + test_lecmp(s, s2) + s2 = u'\ud900\udd01' + test_lecmp(s, s2) + s2 = u'\uda00\udd01' + test_lecmp(s, s2) + s2 = u'\udb00\udd01' + test_lecmp(s, s2) + s2 = u'\ud800\ude01' + test_lecmp(s, s2) + s2 = u'\ud900\ude01' + test_lecmp(s, s2) + s2 = u'\uda00\ude01' + test_lecmp(s, s2) + s2 = u'\udb00\ude01' + test_lecmp(s, s2) + s2 = u'\ud800\udfff' + test_lecmp(s, s2) + s2 = u'\ud900\udfff' + test_lecmp(s, s2) + s2 = u'\uda00\udfff' + test_lecmp(s, s2) + s2 = u'\udb00\udfff' + test_lecmp(s, s2) + + test_fixup(u'\ue000') + test_fixup(u'\uff61') + + # Surrogates on both sides, no fixup required + assert u'\ud800\udc02' < u'\ud84d\udc56' + print 'done.' test('ljust', u'abc', u'abc ', 10) test('rjust', u'abc', u' abc', 10) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 83efa8167a8..95f47616eed 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3169,6 +3169,12 @@ unicode_center(PyUnicodeObject *self, PyObject *args) return (PyObject*) pad(self, left, marg - left, ' '); } +#if 0 + +/* This code should go into some future Unicode collation support + module. The basic comparison should compare ordinals on a naive + basis (this is what Java does and thus JPython too). + /* speedy UTF-16 code point order comparison */ /* gleaned from: */ /* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */ @@ -3213,6 +3219,33 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) return (len1 < len2) ? -1 : (len1 != len2); } +#else + +static int +unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) +{ + register int len1, len2; + + Py_UNICODE *s1 = str1->str; + Py_UNICODE *s2 = str2->str; + + len1 = str1->length; + len2 = str2->length; + + while (len1 > 0 && len2 > 0) { + register long diff; + + diff = (long)*s1++ - (long)*s2++; + if (diff) + return (diff < 0) ? -1 : (diff != 0); + len1--; len2--; + } + + return (len1 < len2) ? -1 : (len1 != len2); +} + +#endif + int PyUnicode_Compare(PyObject *left, PyObject *right) {