mirror of https://github.com/perkeep/perkeep.git
171 lines
5.7 KiB
Go
171 lines
5.7 KiB
Go
/*
|
|
Copyright 2011 Google Inc.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
nYou may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
// Package magic implements MIME type sniffing of data based on the
|
|
// well-known "magic" number prefixes in the file.
|
|
package magic // import "camlistore.org/pkg/magic"
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"mime"
|
|
"net/http"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
type prefixEntry struct {
|
|
prefix []byte
|
|
mtype string
|
|
}
|
|
|
|
// usable source: http://www.garykessler.net/library/file_sigs.html
|
|
// mime types: http://www.iana.org/assignments/media-types/media-types.xhtml
|
|
var prefixTable = []prefixEntry{
|
|
{[]byte("GIF87a"), "image/gif"},
|
|
{[]byte("GIF89a"), "image/gif"}, // TODO: Others?
|
|
{[]byte("\xff\xd8\xff\xe2"), "image/jpeg"},
|
|
{[]byte("\xff\xd8\xff\xe1"), "image/jpeg"},
|
|
{[]byte("\xff\xd8\xff\xe0"), "image/jpeg"},
|
|
{[]byte("\xff\xd8\xff\xdb"), "image/jpeg"},
|
|
{[]byte("\x49\x49\x2a\x00\x10\x00\x00\x00\x43\x52\x02"), "image/cr2"},
|
|
{[]byte{137, 'P', 'N', 'G', '\r', '\n', 26, 10}, "image/png"},
|
|
{[]byte{0x49, 0x20, 0x49}, "image/tiff"},
|
|
{[]byte{0x49, 0x49, 0x2A, 0}, "image/tiff"},
|
|
{[]byte{0x4D, 0x4D, 0, 0x2A}, "image/tiff"},
|
|
{[]byte{0x4D, 0x4D, 0, 0x2B}, "image/tiff"},
|
|
{[]byte("8BPS"), "image/vnd.adobe.photoshop"},
|
|
{[]byte("gimp xcf "), "image/xcf"},
|
|
{[]byte("-----BEGIN PGP PUBLIC KEY BLOCK---"), "text/x-openpgp-public-key"},
|
|
{[]byte("fLaC\x00\x00\x00"), "audio/flac"},
|
|
{[]byte{'I', 'D', '3'}, "audio/mpeg"},
|
|
{[]byte{0, 0, 1, 0xB7}, "video/mpeg"},
|
|
{[]byte{0, 0, 0, 0x14, 0x66, 0x74, 0x79, 0x70, 0x71, 0x74, 0x20, 0x20}, "video/quicktime"},
|
|
{[]byte{0, 0x6E, 0x1E, 0xF0}, "application/vnd.ms-powerpoint"},
|
|
{[]byte{0x1A, 0x45, 0xDF, 0xA3}, "video/webm"},
|
|
{[]byte("FLV\x01"), "application/vnd.adobe.flash.video"},
|
|
{[]byte{0x1F, 0x8B, 0x08}, "application/gzip"},
|
|
{[]byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C}, "application/x-7z-compressed"},
|
|
{[]byte("BZh"), "application/bzip2"},
|
|
{[]byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0}, "application/x-xz"},
|
|
{[]byte{'P', 'K', 3, 4, 0x0A, 0, 2, 0}, "application/epub+zip"},
|
|
{[]byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}, "application/vnd.ms-word"},
|
|
{[]byte{'P', 'K', 3, 4, 0x0A, 0x14, 0, 6, 0}, "application/vnd.openxmlformats-officedocument.custom-properties+xml"},
|
|
{[]byte{'P', 'K', 3, 4}, "application/zip"},
|
|
{[]byte("%PDF"), "application/pdf"},
|
|
{[]byte("{rtf"), "text/rtf1"},
|
|
{[]byte("BEGIN:VCARD\x0D\x0A"), "text/vcard"},
|
|
{[]byte("Return-Path: "), "message/rfc822"},
|
|
|
|
// TODO(bradfitz): popular audio & video formats at least
|
|
}
|
|
|
|
// MIMEType returns the MIME type from the data in the provided header
|
|
// of the data.
|
|
// It returns the empty string if the MIME type can't be determined.
|
|
func MIMEType(hdr []byte) string {
|
|
hlen := len(hdr)
|
|
for _, pte := range prefixTable {
|
|
plen := len(pte.prefix)
|
|
if hlen > plen && bytes.Equal(hdr[:plen], pte.prefix) {
|
|
return pte.mtype
|
|
}
|
|
}
|
|
t := http.DetectContentType(hdr)
|
|
t = strings.Replace(t, "; charset=utf-8", "", 1)
|
|
if t != "application/octet-stream" && t != "text/plain" {
|
|
return t
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// MIMETypeFromReader takes a reader, sniffs the beginning of it,
|
|
// and returns the mime (if sniffed, else "") and a new reader
|
|
// that's the concatenation of the bytes sniffed and the remaining
|
|
// reader.
|
|
func MIMETypeFromReader(r io.Reader) (mime string, reader io.Reader) {
|
|
var buf bytes.Buffer
|
|
_, err := io.Copy(&buf, io.LimitReader(r, 1024))
|
|
mime = MIMEType(buf.Bytes())
|
|
if err != nil {
|
|
return mime, io.MultiReader(&buf, errReader{err})
|
|
}
|
|
return mime, io.MultiReader(&buf, r)
|
|
}
|
|
|
|
// MIMETypeFromReader takes a ReaderAt, sniffs the beginning of it,
|
|
// and returns the MIME type if sniffed, else the empty string.
|
|
func MIMETypeFromReaderAt(ra io.ReaderAt) (mime string) {
|
|
var buf [1024]byte
|
|
n, _ := ra.ReadAt(buf[:], 0)
|
|
return MIMEType(buf[:n])
|
|
}
|
|
|
|
// errReader is an io.Reader which just returns err.
|
|
type errReader struct{ err error }
|
|
|
|
func (er errReader) Read([]byte) (int, error) { return 0, er.err }
|
|
|
|
// TODO(mpl): unexport VideoExtensions
|
|
|
|
// VideoExtensions are common video filename extensions that are not
|
|
// covered by mime.TypeByExtension.
|
|
var VideoExtensions = map[string]bool{
|
|
"m1v": true,
|
|
"m2v": true,
|
|
"m4v": true,
|
|
}
|
|
|
|
// HasExtension returns whether the file extension of filename is among
|
|
// extensions. It is a case-insensitive lookup, optimized for the ASCII case.
|
|
func HasExtension(filename string, extensions map[string]bool) bool {
|
|
var ext string
|
|
if e := filepath.Ext(filename); strings.HasPrefix(e, ".") {
|
|
ext = e[1:]
|
|
} else {
|
|
return false
|
|
}
|
|
|
|
// Case-insensitive lookup.
|
|
// Optimistically assume a short ASCII extension and be
|
|
// allocation-free in that case.
|
|
var buf [10]byte
|
|
lower := buf[:0]
|
|
const utf8RuneSelf = 0x80 // from utf8 package, but not importing it.
|
|
for i := 0; i < len(ext); i++ {
|
|
c := ext[i]
|
|
if c >= utf8RuneSelf {
|
|
// Slow path.
|
|
return extensions[strings.ToLower(ext)]
|
|
}
|
|
if 'A' <= c && c <= 'Z' {
|
|
lower = append(lower, c+('a'-'A'))
|
|
} else {
|
|
lower = append(lower, c)
|
|
}
|
|
}
|
|
// The conversion from []byte to string doesn't allocate in
|
|
// a map lookup.
|
|
return extensions[string(lower)]
|
|
}
|
|
|
|
// MIMETypeByExtension calls mime.TypeByExtension, and removes optional parameters,
|
|
// to keep only the type and subtype.
|
|
func MIMETypeByExtension(ext string) string {
|
|
mimeParts := strings.SplitN(mime.TypeByExtension(ext), ";", 2)
|
|
return strings.TrimSpace(mimeParts[0])
|
|
}
|