diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 7a72c26d5ba..cb03f8da202 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -269,6 +269,20 @@ The :mod:`csv` module defines the following classes: Analyze the sample text (presumed to be in CSV format) and return :const:`True` if the first row appears to be a series of column headers. + Inspecting each column, one of two key criteria will be considered to + estimate if the sample contains a header: + + - the second through n-th rows contain numeric values + - the second through n-th rows contain strings where at least one value's + length differs from that of the putative header of that column. + + Twenty rows after the first row are sampled; if more than half of columns + + rows meet the criteria, :const:`True` is returned. + + .. note:: + + This method is a rough heuristic and may produce both false positives and + negatives. An example for :class:`Sniffer` use:: diff --git a/Lib/csv.py b/Lib/csv.py index dc85077f3ec..bb3ee269ae7 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -409,14 +409,10 @@ def has_header(self, sample): continue # skip rows that have irregular number of columns for col in list(columnTypes.keys()): - - for thisType in [int, float, complex]: - try: - thisType(row[col]) - break - except (ValueError, OverflowError): - pass - else: + thisType = complex + try: + thisType(row[col]) + except (ValueError, OverflowError): # fallback to length of string thisType = len(row[col]) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 18b86aa71a5..09e72a71f1d 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1020,6 +1020,42 @@ class TestSniffer(unittest.TestCase): 'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back' """ + sample10 = dedent(""" + abc,def + ghijkl,mno + ghi,jkl + """) + + sample11 = dedent(""" + abc,def + ghijkl,mnop + ghi,jkl + """) + + sample12 = dedent(""""time","forces" + 1,1.5 + 0.5,5+0j + 0,0 + 1+1j,6 + """) + + sample13 = dedent(""""time","forces" + 0,0 + 1,2 + a,b + """) + + def test_issue43625(self): + sniffer = csv.Sniffer() + self.assertTrue(sniffer.has_header(self.sample12)) + self.assertFalse(sniffer.has_header(self.sample13)) + + def test_has_header_strings(self): + "More to document existing (unexpected?) behavior than anything else." + sniffer = csv.Sniffer() + self.assertFalse(sniffer.has_header(self.sample10)) + self.assertFalse(sniffer.has_header(self.sample11)) + def test_has_header(self): sniffer = csv.Sniffer() self.assertIs(sniffer.has_header(self.sample1), False) diff --git a/Misc/NEWS.d/next/Library/2021-06-29-07-27-08.bpo-43625.ZlAxhp.rst b/Misc/NEWS.d/next/Library/2021-06-29-07-27-08.bpo-43625.ZlAxhp.rst new file mode 100644 index 00000000000..a21975b948e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-06-29-07-27-08.bpo-43625.ZlAxhp.rst @@ -0,0 +1,2 @@ +Fix a bug in the detection of CSV file headers by +:meth:`csv.Sniffer.has_header` and improve documentation of same.