perkeep/vendor/labix.org/v2/mgo/gridfs.go

718 lines
20 KiB
Go

// mgo - MongoDB driver for Go
//
// Copyright (c) 2010-2012 - Gustavo Niemeyer <gustavo@niemeyer.net>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package mgo
import (
"crypto/md5"
"encoding/hex"
"errors"
"hash"
"io"
"labix.org/v2/mgo/bson"
"os"
"sync"
"time"
)
type GridFS struct {
Files *Collection
Chunks *Collection
}
type gfsFileMode int
const (
gfsClosed gfsFileMode = 0
gfsReading gfsFileMode = 1
gfsWriting gfsFileMode = 2
)
type GridFile struct {
m sync.Mutex
c sync.Cond
gfs *GridFS
mode gfsFileMode
err error
chunk int
offset int64
wpending int
wbuf []byte
wsum hash.Hash
rbuf []byte
rcache *gfsCachedChunk
doc gfsFile
}
type gfsFile struct {
Id interface{} "_id"
ChunkSize int "chunkSize"
UploadDate time.Time "uploadDate"
Length int64 ",minsize"
MD5 string
Filename string ",omitempty"
ContentType string "contentType,omitempty"
Metadata *bson.Raw ",omitempty"
}
type gfsChunk struct {
Id interface{} "_id"
FilesId interface{} "files_id"
N int
Data []byte
}
type gfsCachedChunk struct {
wait sync.Mutex
n int
data []byte
err error
}
func newGridFS(db *Database, prefix string) *GridFS {
return &GridFS{db.C(prefix + ".files"), db.C(prefix + ".chunks")}
}
func (gfs *GridFS) newFile() *GridFile {
file := &GridFile{gfs: gfs}
file.c.L = &file.m
//runtime.SetFinalizer(file, finalizeFile)
return file
}
func finalizeFile(file *GridFile) {
file.Close()
}
// Create creates a new file with the provided name in the GridFS. If the file
// name already exists, a new version will be inserted with an up-to-date
// uploadDate that will cause it to be atomically visible to the Open and
// OpenId methods. If the file name is not important, an empty name may be
// provided and the file Id used instead.
//
// It's important to Close files whether they are being written to
// or read from, and to check the err result to ensure the operation
// completed successfully.
//
// A simple example inserting a new file:
//
// func check(err os.Error) {
// if err != nil {
// panic(err.String())
// }
// }
// file, err := db.GridFS("fs").Create("myfile.txt")
// check(err)
// n, err := file.Write([]byte("Hello world!")
// check(err)
// err = file.Close()
// check(err)
// fmt.Printf("%d bytes written\n", n)
//
// The io.Writer interface is implemented by *GridFile and may be used to
// help on the file creation. For example:
//
// file, err := db.GridFS("fs").Create("myfile.txt")
// check(err)
// messages, err := os.Open("/var/log/messages")
// check(err)
// defer messages.Close()
// err = io.Copy(file, messages)
// check(err)
// err = file.Close()
// check(err)
//
func (gfs *GridFS) Create(name string) (file *GridFile, err error) {
file = gfs.newFile()
file.mode = gfsWriting
file.wsum = md5.New()
file.doc = gfsFile{Id: bson.NewObjectId(), ChunkSize: 256 * 1024, Filename: name}
return
}
// OpenId returns the file with the provided id, for reading.
// If the file isn't found, err will be set to mgo.ErrNotFound.
//
// It's important to Close files whether they are being written to
// or read from, and to check the err result to ensure the operation
// completed successfully.
//
// The following example will print the first 8192 bytes from the file:
//
// func check(err os.Error) {
// if err != nil {
// panic(err.String())
// }
// }
// file, err := db.GridFS("fs").OpenId(objid)
// check(err)
// b := make([]byte, 8192)
// n, err := file.Read(b)
// check(err)
// fmt.Println(string(b))
// check(err)
// err = file.Close()
// check(err)
// fmt.Printf("%d bytes read\n", n)
//
// The io.Reader interface is implemented by *GridFile and may be used to
// deal with it. As an example, the following snippet will dump the whole
// file into the standard output:
//
// file, err := db.GridFS("fs").OpenId(objid)
// check(err)
// err = io.Copy(os.Stdout, file)
// check(err)
// err = file.Close()
// check(err)
//
func (gfs *GridFS) OpenId(id interface{}) (file *GridFile, err error) {
var doc gfsFile
err = gfs.Files.Find(bson.M{"_id": id}).One(&doc)
if err != nil {
return
}
file = gfs.newFile()
file.mode = gfsReading
file.doc = doc
return
}
// Open returns the most recently uploaded file with the provided
// name, for reading. If the file isn't found, err will be set
// to mgo.ErrNotFound.
//
// It's important to Close files whether they are being written to
// or read from, and to check the err result to ensure the operation
// completed successfully.
//
// The following example will print the first 8192 bytes from the file:
//
// file, err := db.GridFS("fs").Open("myfile.txt")
// check(err)
// b := make([]byte, 8192)
// n, err := file.Read(b)
// check(err)
// fmt.Println(string(b))
// check(err)
// err = file.Close()
// check(err)
// fmt.Printf("%d bytes read\n", n)
//
// The io.Reader interface is implemented by *GridFile and may be used to
// deal with it. As an example, the following snippet will dump the whole
// file into the standard output:
//
// file, err := db.GridFS("fs").Open("myfile.txt")
// check(err)
// err = io.Copy(os.Stdout, file)
// check(err)
// err = file.Close()
// check(err)
//
func (gfs *GridFS) Open(name string) (file *GridFile, err error) {
var doc gfsFile
err = gfs.Files.Find(bson.M{"filename": name}).Sort("-uploadDate").One(&doc)
if err != nil {
return
}
file = gfs.newFile()
file.mode = gfsReading
file.doc = doc
return
}
// OpenNext opens the next file from iter for reading, sets *file to it,
// and returns true on the success case. If no more documents are available
// on iter or an error occurred, *file is set to nil and the result is false.
// Errors will be available via iter.Err().
//
// The iter parameter must be an iterator on the GridFS files collection.
// Using the GridFS.Find method is an easy way to obtain such an iterator,
// but any iterator on the collection will work.
//
// If the provided *file is non-nil, OpenNext will close it before attempting
// to iterate to the next element. This means that in a loop one only
// has to worry about closing files when breaking out of the loop early
// (break, return, or panic).
//
// For example:
//
// gfs := db.GridFS("fs")
// query := gfs.Find(nil).Sort("filename")
// iter := query.Iter()
// var f *mgo.GridFile
// for gfs.OpenNext(iter, &f) {
// fmt.Printf("Filename: %s\n", f.Name())
// }
// if iter.Close() != nil {
// panic(iter.Close())
// }
//
func (gfs *GridFS) OpenNext(iter *Iter, file **GridFile) bool {
if *file != nil {
// Ignoring the error here shouldn't be a big deal
// as we're reading the file and the loop iteration
// for this file is finished.
_ = (*file).Close()
}
var doc gfsFile
if !iter.Next(&doc) {
*file = nil
return false
}
f := gfs.newFile()
f.mode = gfsReading
f.doc = doc
*file = f
return true
}
// Find runs query on GridFS's files collection and returns
// the resulting Query.
//
// This logic:
//
// gfs := db.GridFS("fs")
// iter := gfs.Find(nil).Iter()
//
// Is equivalent to:
//
// files := db.C("fs" + ".files")
// iter := files.Find(nil).Iter()
//
func (gfs *GridFS) Find(query interface{}) *Query {
return gfs.Files.Find(query)
}
// RemoveId deletes the file with the provided id from the GridFS.
func (gfs *GridFS) RemoveId(id interface{}) error {
err := gfs.Files.Remove(bson.M{"_id": id})
if err != nil {
return err
}
_, err = gfs.Chunks.RemoveAll(bson.M{"files_id": id})
return err
}
type gfsDocId struct {
Id interface{} "_id"
}
// Remove deletes all files with the provided name from the GridFS.
func (gfs *GridFS) Remove(name string) (err error) {
iter := gfs.Files.Find(bson.M{"filename": name}).Select(bson.M{"_id": 1}).Iter()
var doc gfsDocId
for iter.Next(&doc) {
if e := gfs.RemoveId(doc.Id); e != nil {
err = e
}
}
if err == nil {
err = iter.Close()
}
return err
}
func (file *GridFile) assertMode(mode gfsFileMode) {
switch file.mode {
case mode:
return
case gfsWriting:
panic("GridFile is open for writing")
case gfsReading:
panic("GridFile is open for reading")
case gfsClosed:
panic("GridFile is closed")
default:
panic("Internal error: missing GridFile mode")
}
}
// SetChunkSize sets size of saved chunks. Once the file is written to, it
// will be split in blocks of that size and each block saved into an
// independent chunk document. The default chunk size is 256kb.
//
// It is a runtime error to call this function once the file has started
// being written to.
func (file *GridFile) SetChunkSize(bytes int) {
file.assertMode(gfsWriting)
debugf("GridFile %p: setting chunk size to %d", file, bytes)
file.m.Lock()
file.doc.ChunkSize = bytes
file.m.Unlock()
}
// Id returns the current file Id.
func (file *GridFile) Id() interface{} {
return file.doc.Id
}
// SetId changes the current file Id. It is a runtime
//
// It is a runtime error to call this function once the file has started
// being written to, or when the file is not open for writing.
func (file *GridFile) SetId(id interface{}) {
file.assertMode(gfsWriting)
file.m.Lock()
file.doc.Id = id
file.m.Unlock()
}
// Name returns the optional file name. An empty string will be returned
// in case it is unset.
func (file *GridFile) Name() string {
return file.doc.Filename
}
// SetName changes the optional file name. An empty string may be used to
// unset it.
//
// It is a runtime error to call this function when the file is not open
// for writing.
func (file *GridFile) SetName(name string) {
file.assertMode(gfsWriting)
file.m.Lock()
file.doc.Filename = name
file.m.Unlock()
}
// ContentType returns the optional file content type. An empty string will be
// returned in case it is unset.
func (file *GridFile) ContentType() string {
return file.doc.ContentType
}
// ContentType changes the optional file content type. An empty string may be
// used to unset it.
//
// It is a runtime error to call this function when the file is not open
// for writing.
func (file *GridFile) SetContentType(ctype string) {
file.assertMode(gfsWriting)
file.m.Lock()
file.doc.ContentType = ctype
file.m.Unlock()
}
// GetMeta unmarshals the optional "metadata" field associated with the
// file into the result parameter. The meaning of keys under that field
// is user-defined. For example:
//
// result := struct{ INode int }{}
// err = file.GetMeta(&result)
// if err != nil {
// panic(err.String())
// }
// fmt.Printf("inode: %d\n", result.INode)
//
func (file *GridFile) GetMeta(result interface{}) (err error) {
file.m.Lock()
if file.doc.Metadata != nil {
err = bson.Unmarshal(file.doc.Metadata.Data, result)
}
file.m.Unlock()
return
}
// SetMeta changes the optional "metadata" field associated with the
// file. The meaning of keys under that field is user-defined.
// For example:
//
// file.SetMeta(bson.M{"inode": inode})
//
// It is a runtime error to call this function when the file is not open
// for writing.
func (file *GridFile) SetMeta(metadata interface{}) {
file.assertMode(gfsWriting)
data, err := bson.Marshal(metadata)
file.m.Lock()
if err != nil && file.err == nil {
file.err = err
} else {
file.doc.Metadata = &bson.Raw{Data: data}
}
file.m.Unlock()
}
// Size returns the file size in bytes.
func (file *GridFile) Size() (bytes int64) {
file.m.Lock()
bytes = file.doc.Length
file.m.Unlock()
return
}
// MD5 returns the file MD5 as a hex-encoded string.
func (file *GridFile) MD5() (md5 string) {
return file.doc.MD5
}
// UploadDate returns the file upload time.
func (file *GridFile) UploadDate() time.Time {
return file.doc.UploadDate
}
// Close flushes any pending changes in case the file is being written
// to, waits for any background operations to finish, and closes the file.
//
// It's important to Close files whether they are being written to
// or read from, and to check the err result to ensure the operation
// completed successfully.
func (file *GridFile) Close() (err error) {
file.m.Lock()
defer file.m.Unlock()
if file.mode == gfsWriting {
if len(file.wbuf) > 0 {
file.insertChunk(file.wbuf)
file.wbuf = file.wbuf[0:0]
}
file.insertFile()
} else if file.mode == gfsReading && file.rcache != nil {
file.rcache.wait.Lock()
file.rcache = nil
}
file.mode = gfsClosed
debugf("GridFile %p: closed", file)
return file.err
}
// Write writes the provided data to the file and returns the
// number of bytes written and an error in case something
// wrong happened.
//
// The file will internally cache the data so that all but the last
// chunk sent to the database have the size defined by SetChunkSize.
// This also means that errors may be deferred until a future call
// to Write or Close.
//
// The parameters and behavior of this function turn the file
// into an io.Writer.
func (file *GridFile) Write(data []byte) (n int, err error) {
file.assertMode(gfsWriting)
file.m.Lock()
debugf("GridFile %p: writing %d bytes", file, len(data))
defer file.m.Unlock()
if file.err != nil {
return 0, file.err
}
n = len(data)
file.doc.Length += int64(n)
chunkSize := file.doc.ChunkSize
if len(file.wbuf)+len(data) < chunkSize {
file.wbuf = append(file.wbuf, data...)
return
}
// First, flush file.wbuf complementing with data.
if len(file.wbuf) > 0 {
missing := chunkSize - len(file.wbuf)
if missing > len(data) {
missing = len(data)
}
file.wbuf = append(file.wbuf, data[:missing]...)
data = data[missing:]
file.insertChunk(file.wbuf)
file.wbuf = file.wbuf[0:0]
}
// Then, flush all chunks from data without copying.
for len(data) > chunkSize {
size := chunkSize
if size > len(data) {
size = len(data)
}
file.insertChunk(data[:size])
data = data[size:]
}
// And append the rest for a future call.
file.wbuf = append(file.wbuf, data...)
return n, file.err
}
func (file *GridFile) insertChunk(data []byte) {
n := file.chunk
file.chunk++
debugf("GridFile %p: adding to checksum: %q", file, string(data))
file.wsum.Write(data)
for file.doc.ChunkSize*file.wpending >= 1024*1024 {
// Hold on.. we got a MB pending.
file.c.Wait()
if file.err != nil {
return
}
}
file.wpending++
debugf("GridFile %p: inserting chunk %d with %d bytes", file, n, len(data))
// We may not own the memory of data, so rather than
// simply copying it, we'll marshal the document ahead of time.
data, err := bson.Marshal(gfsChunk{bson.NewObjectId(), file.doc.Id, n, data})
if err != nil {
file.err = err
return
}
go func() {
err := file.gfs.Chunks.Insert(bson.Raw{Data: data})
file.m.Lock()
file.wpending--
if err != nil && file.err == nil {
file.err = err
}
file.c.Broadcast()
file.m.Unlock()
}()
}
func (file *GridFile) insertFile() {
hexsum := hex.EncodeToString(file.wsum.Sum(nil))
for file.wpending > 0 {
debugf("GridFile %p: waiting for %d pending chunks to insert file", file, file.wpending)
file.c.Wait()
}
if file.err == nil {
file.doc.UploadDate = bson.Now()
file.doc.MD5 = hexsum
file.err = file.gfs.Files.Insert(file.doc)
file.gfs.Chunks.EnsureIndexKey("files_id", "n")
}
}
// Seek sets the offset for the next Read or Write on file to
// offset, interpreted according to whence: 0 means relative to
// the origin of the file, 1 means relative to the current offset,
// and 2 means relative to the end. It returns the new offset and
// an Error, if any.
func (file *GridFile) Seek(offset int64, whence int) (pos int64, err error) {
file.m.Lock()
debugf("GridFile %p: seeking for %s (whence=%d)", file, offset, whence)
defer file.m.Unlock()
switch whence {
case os.SEEK_SET:
case os.SEEK_CUR:
offset += file.offset
case os.SEEK_END:
offset += file.doc.Length
default:
panic("Unsupported whence value")
}
if offset > file.doc.Length {
return file.offset, errors.New("Seek past end of file")
}
chunk := int(offset / int64(file.doc.ChunkSize))
if chunk+1 == file.chunk && offset >= file.offset {
file.rbuf = file.rbuf[int(offset-file.offset):]
file.offset = offset
return file.offset, nil
}
file.offset = offset
file.chunk = chunk
file.rbuf = nil
file.rbuf, err = file.getChunk()
if err == nil {
file.rbuf = file.rbuf[int(file.offset-int64(chunk)*int64(file.doc.ChunkSize)):]
}
return file.offset, err
}
// Read reads into b the next available data from the file and
// returns the number of bytes written and an error in case
// something wrong happened. At the end of the file, n will
// be zero and err will be set to os.EOF.
//
// The parameters and behavior of this function turn the file
// into an io.Reader.
func (file *GridFile) Read(b []byte) (n int, err error) {
file.assertMode(gfsReading)
file.m.Lock()
debugf("GridFile %p: reading at offset %d into buffer of length %d", file, file.offset, len(b))
defer file.m.Unlock()
if file.offset == file.doc.Length {
return 0, io.EOF
}
for err == nil {
i := copy(b, file.rbuf)
n += i
file.offset += int64(i)
file.rbuf = file.rbuf[i:]
if i == len(b) || file.offset == file.doc.Length {
break
}
b = b[i:]
file.rbuf, err = file.getChunk()
}
return n, err
}
func (file *GridFile) getChunk() (data []byte, err error) {
cache := file.rcache
file.rcache = nil
if cache != nil && cache.n == file.chunk {
debugf("GridFile %p: Getting chunk %d from cache", file, file.chunk)
cache.wait.Lock()
data, err = cache.data, cache.err
} else {
debugf("GridFile %p: Fetching chunk %d", file, file.chunk)
var doc gfsChunk
err = file.gfs.Chunks.Find(bson.D{{"files_id", file.doc.Id}, {"n", file.chunk}}).One(&doc)
data = doc.Data
}
file.chunk++
if int64(file.chunk)*int64(file.doc.ChunkSize) < file.doc.Length {
// Read the next one in background.
cache = &gfsCachedChunk{n: file.chunk}
cache.wait.Lock()
debugf("GridFile %p: Scheduling chunk %d for background caching", file, file.chunk)
// Clone the session to avoid having it closed in between.
chunks := file.gfs.Chunks
session := chunks.Database.Session.Clone()
go func(id interface{}, n int) {
defer session.Close()
chunks = chunks.With(session)
var doc gfsChunk
cache.err = chunks.Find(bson.D{{"files_id", id}, {"n", n}}).One(&doc)
cache.data = doc.Data
cache.wait.Unlock()
}(file.doc.Id, file.chunk)
file.rcache = cache
}
debugf("Returning err: %#v", err)
return
}