cpython/Lib/dos-8x3/mimetype.py

"""Guess the MIME type of a file.

This module defines two useful functions:

guess_type(url) -- guess the MIME type and encoding of a URL.

guess_extension(type) -- guess the extension for a given MIME type.

It also contains the following, for tuning the behavior:

Data:

knownfiles -- list of files to parse
inited -- flag set when init() has been called
suffixes_map -- dictionary mapping suffixes to suffixes
encodings_map -- dictionary mapping suffixes to encodings
types_map -- dictionary mapping suffixes to types

Functions:

init([files]) -- parse a list of files, default knownfiles
read_mime_types(file) -- parse one file, return a dictionary or None

"""

import string
import posixpath
import urllib

knownfiles = [
    "/usr/local/etc/httpd/conf/mime.types",
    "/usr/local/lib/netscape/mime.types",
    "/usr/local/etc/httpd/conf/mime.types",	# Apache 1.2
    "/usr/local/etc/mime.types",		# Apache 1.3
    ]

inited = 0

def guess_type(url):
    """Guess the type of a file based on its URL.

    Return value is a tuple (type, encoding) where type is None if the
    type can't be guessed (no or unknown suffix) or a string of the
    form type/subtype, usable for a MIME Content-type header; and
    encoding is None for no encoding or the name of the program used
    to encode (e.g. compress or gzip).  The mappings are table
    driven.  Encoding suffixes are case sensitive; type suffixes are
    first tried case sensitive, then case insensitive.

    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
    to ".tar.gz".  (This is table-driven too, using the dictionary
    suffix_map).

    """
    if not inited:
        init()
    scheme, url = urllib.splittype(url)
    if scheme == 'data':
	# syntax of data URLs:
	# dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
	# mediatype := [ type "/" subtype ] *( ";" parameter )
	# data      := *urlchar
	# parameter := attribute "=" value
	# type/subtype defaults to "text/plain"
	comma = string.find(url, ',')
	if comma < 0:
	    # bad data URL
	    return None, None
	semi = string.find(url, ';', 0, comma)
	if semi >= 0:
	    type = url[:semi]
	else:
	    type = url[:comma]
	if '=' in type or '/' not in type:
	    type = 'text/plain'
	return type, None		# never compressed, so encoding is None
    base, ext = posixpath.splitext(url)
    while suffix_map.has_key(ext):
        base, ext = posixpath.splitext(base + suffix_map[ext])
    if encodings_map.has_key(ext):
        encoding = encodings_map[ext]
        base, ext = posixpath.splitext(base)
    else:
        encoding = None
    if types_map.has_key(ext):
        return types_map[ext], encoding
    elif types_map.has_key(string.lower(ext)):
        return types_map[string.lower(ext)], encoding
    else:
        return None, encoding

def guess_extension(type):
    """Guess the extension for a file based on its MIME type.

    Return value is a string giving a filename extension, including the
    leading dot ('.').  The extension is not guaranteed to have been
    associated with any particular data stream, but would be mapped to the
    MIME type `type' by guess_type().  If no extension can be guessed for
    `type', None is returned.
    """
    global inited
    if not inited:
        init()
    type = string.lower(type)
    for ext, stype in types_map.items():
        if type == stype:
            return ext
    return None

def init(files=None):
    global inited
    for file in files or knownfiles:
        s = read_mime_types(file)
        if s:
            for key, value in s.items():
                types_map[key] = value
    inited = 1

def read_mime_types(file):
    try:
        f = open(file)
    except IOError:
        return None
    map = {}
    while 1:
        line = f.readline()
        if not line: break
        words = string.split(line)
        for i in range(len(words)):
            if words[i][0] == '#':
                del words[i:]
                break
        if not words: continue
        type, suffixes = words[0], words[1:]
        for suff in suffixes:
            map['.'+suff] = type
    f.close()
    return map

suffix_map = {
    '.tgz': '.tar.gz',
    '.taz': '.tar.gz',
    '.tz': '.tar.gz',
}

encodings_map = {
    '.gz': 'gzip',
    '.Z': 'compress',
    }

types_map = {
    '.a': 'application/octet-stream',
    '.ai': 'application/postscript',
    '.aif': 'audio/x-aiff',
    '.aifc': 'audio/x-aiff',
    '.aiff': 'audio/x-aiff',
    '.au': 'audio/basic',
    '.avi': 'video/x-msvideo',
    '.bcpio': 'application/x-bcpio',
    '.bin': 'application/octet-stream',
    '.cdf': 'application/x-netcdf',
    '.cpio': 'application/x-cpio',
    '.csh': 'application/x-csh',
    '.dll': 'application/octet-stream',
    '.dvi': 'application/x-dvi',
    '.exe': 'application/octet-stream',
    '.eps': 'application/postscript',
    '.etx': 'text/x-setext',
    '.gif': 'image/gif',
    '.gtar': 'application/x-gtar',
    '.hdf': 'application/x-hdf',
    '.htm': 'text/html',
    '.html': 'text/html',
    '.ief': 'image/ief',
    '.jpe': 'image/jpeg',
    '.jpeg': 'image/jpeg',
    '.jpg': 'image/jpeg',
    '.latex': 'application/x-latex',
    '.man': 'application/x-troff-man',
    '.me': 'application/x-troff-me',
    '.mif': 'application/x-mif',
    '.mov': 'video/quicktime',
    '.movie': 'video/x-sgi-movie',
    '.mpe': 'video/mpeg',
    '.mpeg': 'video/mpeg',
    '.mpg': 'video/mpeg',
    '.ms': 'application/x-troff-ms',
    '.nc': 'application/x-netcdf',
    '.o': 'application/octet-stream',
    '.obj': 'application/octet-stream',
    '.oda': 'application/oda',
    '.pbm': 'image/x-portable-bitmap',
    '.pdf': 'application/pdf',
    '.pgm': 'image/x-portable-graymap',
    '.pnm': 'image/x-portable-anymap',
    '.png': 'image/png',
    '.ppm': 'image/x-portable-pixmap',
    '.py': 'text/x-python',
    '.pyc': 'application/x-python-code',
    '.ps': 'application/postscript',
    '.qt': 'video/quicktime',
    '.ras': 'image/x-cmu-raster',
    '.rgb': 'image/x-rgb',
    '.roff': 'application/x-troff',
    '.rtf': 'application/rtf',
    '.rtx': 'text/richtext',
    '.sgm': 'text/x-sgml',
    '.sgml': 'text/x-sgml',
    '.sh': 'application/x-sh',
    '.shar': 'application/x-shar',
    '.snd': 'audio/basic',
    '.so': 'application/octet-stream',
    '.src': 'application/x-wais-source',
    '.sv4cpio': 'application/x-sv4cpio',
    '.sv4crc': 'application/x-sv4crc',
    '.t': 'application/x-troff',
    '.tar': 'application/x-tar',
    '.tcl': 'application/x-tcl',
    '.tex': 'application/x-tex',
    '.texi': 'application/x-texinfo',
    '.texinfo': 'application/x-texinfo',
    '.tif': 'image/tiff',
    '.tiff': 'image/tiff',
    '.tr': 'application/x-troff',
    '.tsv': 'text/tab-separated-values',
    '.txt': 'text/plain',
    '.ustar': 'application/x-ustar',
    '.wav': 'audio/x-wav',
    '.xbm': 'image/x-xbitmap',
    '.xml': 'text/xml',
    '.xpm': 'image/x-xpixmap',
    '.xwd': 'image/x-xwindowdump',
    '.zip': 'application/zip',
    }
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`"""Guess the MIME type of a file.`

The usual. 1998-08-12 02:38:11 +00:00			`This module defines two useful functions:`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00
			`guess_type(url) -- guess the MIME type and encoding of a URL.`

The usual. 1998-08-12 02:38:11 +00:00			`guess_extension(type) -- guess the extension for a given MIME type.`

The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`It also contains the following, for tuning the behavior:`

			`Data:`

			`knownfiles -- list of files to parse`
			`inited -- flag set when init() has been called`
			`suffixes_map -- dictionary mapping suffixes to suffixes`
			`encodings_map -- dictionary mapping suffixes to encodings`
			`types_map -- dictionary mapping suffixes to types`

			`Functions:`

			`init([files]) -- parse a list of files, default knownfiles`
			`read_mime_types(file) -- parse one file, return a dictionary or None`

			`"""`

			`import string`
			`import posixpath`
The usual 1998-10-17 18:09:27 +00:00			`import urllib`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00
			`knownfiles = [`
			`"/usr/local/etc/httpd/conf/mime.types",`
			`"/usr/local/lib/netscape/mime.types",`
The usual. 1998-08-12 02:38:11 +00:00			`"/usr/local/etc/httpd/conf/mime.types", # Apache 1.2`
			`"/usr/local/etc/mime.types", # Apache 1.3`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`]`

			`inited = 0`

			`def guess_type(url):`
			`"""Guess the type of a file based on its URL.`

			`Return value is a tuple (type, encoding) where type is None if the`
			`type can't be guessed (no or unknown suffix) or a string of the`
			`form type/subtype, usable for a MIME Content-type header; and`
			`encoding is None for no encoding or the name of the program used`
			`to encode (e.g. compress or gzip). The mappings are table`
			`driven. Encoding suffixes are case sensitive; type suffixes are`
			`first tried case sensitive, then case insensitive.`

			`The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped`
			`to ".tar.gz". (This is table-driven too, using the dictionary`
The usual. 1998-08-12 02:38:11 +00:00			`suffix_map).`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00
			`"""`
			`if not inited:`
The usual. 1998-03-26 22:14:20 +00:00			`init()`
The usual 1998-10-17 18:09:27 +00:00			`scheme, url = urllib.splittype(url)`
			`if scheme == 'data':`
			`# syntax of data URLs:`
			`# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data`
			`# mediatype := [ type "/" subtype ] *( ";" parameter )`
			`# data := *urlchar`
			`# parameter := attribute "=" value`
			`# type/subtype defaults to "text/plain"`
			`comma = string.find(url, ',')`
			`if comma < 0:`
			`# bad data URL`
			`return None, None`
			`semi = string.find(url, ';', 0, comma)`
			`if semi >= 0:`
			`type = url[:semi]`
			`else:`
			`type = url[:comma]`
			`if '=' in type or '/' not in type:`
			`type = 'text/plain'`
			`return type, None # never compressed, so encoding is None`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`base, ext = posixpath.splitext(url)`
			`while suffix_map.has_key(ext):`
The usual. 1998-03-26 22:14:20 +00:00			`base, ext = posixpath.splitext(base + suffix_map[ext])`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`if encodings_map.has_key(ext):`
The usual. 1998-03-26 22:14:20 +00:00			`encoding = encodings_map[ext]`
			`base, ext = posixpath.splitext(base)`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`else:`
The usual. 1998-03-26 22:14:20 +00:00			`encoding = None`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`if types_map.has_key(ext):`
The usual. 1998-03-26 22:14:20 +00:00			`return types_map[ext], encoding`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`elif types_map.has_key(string.lower(ext)):`
The usual. 1998-03-26 22:14:20 +00:00			`return types_map[string.lower(ext)], encoding`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`else:`
The usual. 1998-03-26 22:14:20 +00:00			`return None, encoding`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00
The usual. 1998-08-12 02:38:11 +00:00			`def guess_extension(type):`
			`"""Guess the extension for a file based on its MIME type.`

			`Return value is a string giving a filename extension, including the`
			`leading dot ('.'). The extension is not guaranteed to have been`
			`associated with any particular data stream, but would be mapped to the`
			MIME type `type' by guess_type(). If no extension can be guessed for
			`type', None is returned.
			`"""`
			`global inited`
			`if not inited:`
			`init()`
			`type = string.lower(type)`
			`for ext, stype in types_map.items():`
			`if type == stype:`
			`return ext`
			`return None`

The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`def init(files=None):`
			`global inited`
			`for file in files or knownfiles:`
The usual. 1998-03-26 22:14:20 +00:00			`s = read_mime_types(file)`
			`if s:`
			`for key, value in s.items():`
			`types_map[key] = value`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`inited = 1`

			`def read_mime_types(file):`
			`try:`
The usual. 1998-03-26 22:14:20 +00:00			`f = open(file)`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`except IOError:`
The usual. 1998-03-26 22:14:20 +00:00			`return None`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`map = {}`
			`while 1:`
The usual. 1998-03-26 22:14:20 +00:00			`line = f.readline()`
			`if not line: break`
			`words = string.split(line)`
			`for i in range(len(words)):`
			`if words[i][0] == '#':`
			`del words[i:]`
			`break`
			`if not words: continue`
			`type, suffixes = words[0], words[1:]`
			`for suff in suffixes:`
			`map['.'+suff] = type`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`f.close()`
			`return map`

			`suffix_map = {`
			`'.tgz': '.tar.gz',`
			`'.taz': '.tar.gz',`
			`'.tz': '.tar.gz',`
			`}`

			`encodings_map = {`
			`'.gz': 'gzip',`
			`'.Z': 'compress',`
			`}`

			`types_map = {`
			`'.a': 'application/octet-stream',`
			`'.ai': 'application/postscript',`
			`'.aif': 'audio/x-aiff',`
			`'.aifc': 'audio/x-aiff',`
			`'.aiff': 'audio/x-aiff',`
			`'.au': 'audio/basic',`
			`'.avi': 'video/x-msvideo',`
			`'.bcpio': 'application/x-bcpio',`
			`'.bin': 'application/octet-stream',`
			`'.cdf': 'application/x-netcdf',`
			`'.cpio': 'application/x-cpio',`
			`'.csh': 'application/x-csh',`
			`'.dll': 'application/octet-stream',`
			`'.dvi': 'application/x-dvi',`
			`'.exe': 'application/octet-stream',`
			`'.eps': 'application/postscript',`
			`'.etx': 'text/x-setext',`
			`'.gif': 'image/gif',`
			`'.gtar': 'application/x-gtar',`
			`'.hdf': 'application/x-hdf',`
			`'.htm': 'text/html',`
			`'.html': 'text/html',`
			`'.ief': 'image/ief',`
			`'.jpe': 'image/jpeg',`
			`'.jpeg': 'image/jpeg',`
			`'.jpg': 'image/jpeg',`
			`'.latex': 'application/x-latex',`
			`'.man': 'application/x-troff-man',`
			`'.me': 'application/x-troff-me',`
			`'.mif': 'application/x-mif',`
			`'.mov': 'video/quicktime',`
			`'.movie': 'video/x-sgi-movie',`
			`'.mpe': 'video/mpeg',`
			`'.mpeg': 'video/mpeg',`
			`'.mpg': 'video/mpeg',`
			`'.ms': 'application/x-troff-ms',`
			`'.nc': 'application/x-netcdf',`
			`'.o': 'application/octet-stream',`
			`'.obj': 'application/octet-stream',`
			`'.oda': 'application/oda',`
			`'.pbm': 'image/x-portable-bitmap',`
			`'.pdf': 'application/pdf',`
			`'.pgm': 'image/x-portable-graymap',`
			`'.pnm': 'image/x-portable-anymap',`
			`'.png': 'image/png',`
			`'.ppm': 'image/x-portable-pixmap',`
			`'.py': 'text/x-python',`
			`'.pyc': 'application/x-python-code',`
			`'.ps': 'application/postscript',`
			`'.qt': 'video/quicktime',`
			`'.ras': 'image/x-cmu-raster',`
			`'.rgb': 'image/x-rgb',`
			`'.roff': 'application/x-troff',`
			`'.rtf': 'application/rtf',`
			`'.rtx': 'text/richtext',`
			`'.sgm': 'text/x-sgml',`
			`'.sgml': 'text/x-sgml',`
			`'.sh': 'application/x-sh',`
			`'.shar': 'application/x-shar',`
			`'.snd': 'audio/basic',`
			`'.so': 'application/octet-stream',`
			`'.src': 'application/x-wais-source',`
			`'.sv4cpio': 'application/x-sv4cpio',`
			`'.sv4crc': 'application/x-sv4crc',`
			`'.t': 'application/x-troff',`
			`'.tar': 'application/x-tar',`
			`'.tcl': 'application/x-tcl',`
			`'.tex': 'application/x-tex',`
			`'.texi': 'application/x-texinfo',`
			`'.texinfo': 'application/x-texinfo',`
			`'.tif': 'image/tiff',`
			`'.tiff': 'image/tiff',`
			`'.tr': 'application/x-troff',`
			`'.tsv': 'text/tab-separated-values',`
			`'.txt': 'text/plain',`
			`'.ustar': 'application/x-ustar',`
			`'.wav': 'audio/x-wav',`
			`'.xbm': 'image/x-xbitmap',`
The usual. 1998-08-12 02:38:11 +00:00			`'.xml': 'text/xml',`
The usual (and some new modules). 1997-10-06 20:19:59 +00:00			`'.xpm': 'image/x-xpixmap',`
			`'.xwd': 'image/x-xwindowdump',`
			`'.zip': 'application/zip',`
			`}`