diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
new file mode 100644
index 00000000000..363a6723a8b
--- /dev/null
+++ b/Lib/HTMLParser.py
@@ -0,0 +1,432 @@
+"""A parser for HTML."""
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import re
+import string
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+interesting_cdata = re.compile(r'<(/|\Z)')
+incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('([0-9]+)[^0-9]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piopen = re.compile(r'<\?')
+piclose = re.compile('>')
+endtagopen = re.compile('')
+declopen = re.compile(']*>')
+commentopen = re.compile('