diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 19580e25999..2ebe6981f5d 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -39,6 +39,14 @@ r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?') +declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*') +declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*') + + +class SGMLParseError(RuntimeError): + """Exception raised for all parse errors.""" + pass + # SGML parser base class -- find tags and call handler functions. # Usage: p = SGMLParser(); p.feed(data); ...; p.close(). @@ -144,7 +152,12 @@ def goahead(self, end): self.handle_data(rawdata[i]) i = i+1 continue - i = match.end(0) + # This is some sort of declaration; in "HTML as + # deployed," this should only be the document type + # declaration (""). + k = self.parse_declaration(i) + if k < 0: break + i = k continue elif rawdata[i] == '&': match = charref.match(rawdata, i) @@ -162,7 +175,7 @@ def goahead(self, end): if rawdata[i-1] != ';': i = i-1 continue else: - raise RuntimeError, 'neither < nor & ??' + raise SGMLParserError('neither < nor & ??') # We get here only if incomplete matches but # nothing else match = incomplete.match(rawdata, i) @@ -186,7 +199,7 @@ def goahead(self, end): def parse_comment(self, i): rawdata = self.rawdata if rawdata[i:i+4] != '