2007-08-30 01:15:14 +00:00
|
|
|
|
# Copyright (C) 2001-2007 Python Software Foundation
|
|
|
|
|
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
|
|
|
|
|
# Contact: email-sig@python.org
|
|
|
|
|
|
|
|
|
|
"""A parser of RFC 2822 and MIME email messages."""
|
|
|
|
|
|
2013-03-16 01:00:48 +00:00
|
|
|
|
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
|
|
|
|
|
'FeedParser', 'BytesFeedParser']
|
2007-08-30 01:15:14 +00:00
|
|
|
|
|
2010-10-08 15:55:28 +00:00
|
|
|
|
from io import StringIO, TextIOWrapper
|
2007-08-30 01:15:14 +00:00
|
|
|
|
|
2013-03-16 00:38:15 +00:00
|
|
|
|
from email.feedparser import FeedParser, BytesFeedParser
|
2012-05-25 19:01:48 +00:00
|
|
|
|
from email._policybase import compat32
|
2007-08-30 01:15:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Parser:
|
2014-02-07 15:44:16 +00:00
|
|
|
|
def __init__(self, _class=None, *, policy=compat32):
|
2007-08-30 01:15:14 +00:00
|
|
|
|
"""Parser of RFC 2822 and MIME email messages.
|
|
|
|
|
|
|
|
|
|
Creates an in-memory object tree representing the email message, which
|
|
|
|
|
can then be manipulated and turned over to a Generator to return the
|
|
|
|
|
textual representation of the message.
|
|
|
|
|
|
|
|
|
|
The string must be formatted as a block of RFC 2822 headers and header
|
2016-05-26 05:35:26 +00:00
|
|
|
|
continuation lines, optionally preceded by a `Unix-from' header. The
|
2007-08-30 01:15:14 +00:00
|
|
|
|
header block is terminated either by the end of the string or by a
|
|
|
|
|
blank line.
|
|
|
|
|
|
|
|
|
|
_class is the class to instantiate for new message objects when they
|
|
|
|
|
must be created. This class must have a constructor that can take
|
|
|
|
|
zero arguments. Default is Message.Message.
|
2011-04-18 17:59:37 +00:00
|
|
|
|
|
|
|
|
|
The policy keyword specifies a policy object that controls a number of
|
|
|
|
|
aspects of the parser's operation. The default policy maintains
|
|
|
|
|
backward compatibility.
|
|
|
|
|
|
2007-08-30 01:15:14 +00:00
|
|
|
|
"""
|
2011-03-29 15:32:35 +00:00
|
|
|
|
self._class = _class
|
2011-04-18 17:59:37 +00:00
|
|
|
|
self.policy = policy
|
2007-08-30 01:15:14 +00:00
|
|
|
|
|
|
|
|
|
def parse(self, fp, headersonly=False):
|
|
|
|
|
"""Create a message structure from the data in a file.
|
|
|
|
|
|
|
|
|
|
Reads all the data from the file and returns the root of the message
|
|
|
|
|
structure. Optional headersonly is a flag specifying whether to stop
|
|
|
|
|
parsing after reading the headers or not. The default is False,
|
|
|
|
|
meaning it parses the entire contents of the file.
|
|
|
|
|
"""
|
2011-04-18 17:59:37 +00:00
|
|
|
|
feedparser = FeedParser(self._class, policy=self.policy)
|
2007-08-30 01:15:14 +00:00
|
|
|
|
if headersonly:
|
|
|
|
|
feedparser._set_headersonly()
|
|
|
|
|
while True:
|
|
|
|
|
data = fp.read(8192)
|
|
|
|
|
if not data:
|
|
|
|
|
break
|
2008-06-12 04:06:45 +00:00
|
|
|
|
feedparser.feed(data)
|
2007-08-30 01:15:14 +00:00
|
|
|
|
return feedparser.close()
|
|
|
|
|
|
|
|
|
|
def parsestr(self, text, headersonly=False):
|
|
|
|
|
"""Create a message structure from a string.
|
|
|
|
|
|
|
|
|
|
Returns the root of the message structure. Optional headersonly is a
|
|
|
|
|
flag specifying whether to stop parsing after reading the headers or
|
|
|
|
|
not. The default is False, meaning it parses the entire contents of
|
|
|
|
|
the file.
|
|
|
|
|
"""
|
|
|
|
|
return self.parse(StringIO(text), headersonly=headersonly)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HeaderParser(Parser):
|
|
|
|
|
def parse(self, fp, headersonly=True):
|
|
|
|
|
return Parser.parse(self, fp, True)
|
|
|
|
|
|
|
|
|
|
def parsestr(self, text, headersonly=True):
|
|
|
|
|
return Parser.parsestr(self, text, True)
|
2010-10-08 15:55:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BytesParser:
|
|
|
|
|
|
|
|
|
|
def __init__(self, *args, **kw):
|
|
|
|
|
"""Parser of binary RFC 2822 and MIME email messages.
|
|
|
|
|
|
|
|
|
|
Creates an in-memory object tree representing the email message, which
|
|
|
|
|
can then be manipulated and turned over to a Generator to return the
|
|
|
|
|
textual representation of the message.
|
|
|
|
|
|
|
|
|
|
The input must be formatted as a block of RFC 2822 headers and header
|
2016-05-26 05:35:26 +00:00
|
|
|
|
continuation lines, optionally preceded by a `Unix-from' header. The
|
2010-10-08 15:55:28 +00:00
|
|
|
|
header block is terminated either by the end of the input or by a
|
|
|
|
|
blank line.
|
|
|
|
|
|
|
|
|
|
_class is the class to instantiate for new message objects when they
|
|
|
|
|
must be created. This class must have a constructor that can take
|
|
|
|
|
zero arguments. Default is Message.Message.
|
|
|
|
|
"""
|
|
|
|
|
self.parser = Parser(*args, **kw)
|
|
|
|
|
|
|
|
|
|
def parse(self, fp, headersonly=False):
|
|
|
|
|
"""Create a message structure from the data in a binary file.
|
|
|
|
|
|
|
|
|
|
Reads all the data from the file and returns the root of the message
|
|
|
|
|
structure. Optional headersonly is a flag specifying whether to stop
|
|
|
|
|
parsing after reading the headers or not. The default is False,
|
|
|
|
|
meaning it parses the entire contents of the file.
|
|
|
|
|
"""
|
|
|
|
|
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
|
2014-06-26 17:31:43 +00:00
|
|
|
|
try:
|
2010-10-29 23:08:13 +00:00
|
|
|
|
return self.parser.parse(fp, headersonly)
|
2014-06-26 17:31:43 +00:00
|
|
|
|
finally:
|
|
|
|
|
fp.detach()
|
2010-10-08 15:55:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parsebytes(self, text, headersonly=False):
|
|
|
|
|
"""Create a message structure from a byte string.
|
|
|
|
|
|
|
|
|
|
Returns the root of the message structure. Optional headersonly is a
|
|
|
|
|
flag specifying whether to stop parsing after reading the headers or
|
|
|
|
|
not. The default is False, meaning it parses the entire contents of
|
|
|
|
|
the file.
|
|
|
|
|
"""
|
|
|
|
|
text = text.decode('ASCII', errors='surrogateescape')
|
|
|
|
|
return self.parser.parsestr(text, headersonly)
|
2011-04-13 20:46:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BytesHeaderParser(BytesParser):
|
|
|
|
|
def parse(self, fp, headersonly=True):
|
|
|
|
|
return BytesParser.parse(self, fp, headersonly=True)
|
|
|
|
|
|
|
|
|
|
def parsebytes(self, text, headersonly=True):
|
|
|
|
|
return BytesParser.parsebytes(self, text, headersonly=True)
|