importer/flickr: set DateCreated attribute

Also cleaned up a few vars.
Moved some attributes to nodeattr pkg.

Change-Id: Iea496d50b12e67132fe88f371a89534c2fe32761
This commit is contained in:
mpl 2014-07-24 23:55:42 +02:00
parent c2dd9a1fce
commit 7e86255cac
5 changed files with 58 additions and 26 deletions

View File

@ -56,13 +56,8 @@ const (
// ItemAttrID is the generic identifier of an item when nothing suitable and more specific
// was found on http://schema.org. Usually a number.
AttrID = "ID"
// http://schema.org/title
AttrTitle = "title"
// http://schema.org/name
AttrName = "name"
// http://schema.org/description
// Value is plain text, no HTML, newlines are newlines.
AttrDescription = "description"
// Free-flowing text definition of a location or place, such
// as a city name, or a full postal address.
AttrLocationText = "locationText"

View File

@ -30,6 +30,7 @@ import (
"camlistore.org/pkg/httputil"
"camlistore.org/pkg/importer"
"camlistore.org/pkg/schema"
"camlistore.org/pkg/schema/nodeattr"
"camlistore.org/third_party/github.com/garyburd/go-oauth/oauth"
)
@ -38,6 +39,8 @@ const (
temporaryCredentialRequestURL = "https://www.flickr.com/services/oauth/request_token"
resourceOwnerAuthorizationURL = "https://www.flickr.com/services/oauth/authorize"
tokenRequestURL = "https://www.flickr.com/services/oauth/access_token"
attrFlickrId = "flickrId"
)
func init() {
@ -198,9 +201,9 @@ func (r *run) importPhotoset(parent *importer.Object, photoset *photosetsGetList
}
if err := photosetNode.SetAttrs(
importer.AttrID, photoset.ID,
importer.AttrTitle, photoset.Title.Content,
importer.AttrDescription, photoset.Description.Content,
attrFlickrId, photoset.ID,
nodeattr.Title, photoset.Title.Content,
nodeattr.Description, photoset.Description.Content,
importer.AttrPrimaryImageOfPage, photoset.PrimaryPhotoID); err != nil {
return 0, err
}
@ -254,18 +257,18 @@ type photosSearch struct {
type photosSearchItem struct {
ID string `json:"id"`
Title string
Ispublic int
Isfriend int
Isfamily int
IsPublic int
IsFriend int
IsFamily int
Description contentString
Dateupload string
Datetaken string
Originalformat string
Lastupdate string
DateUpload string // Unix timestamp, in GMT.
DateTaken string // formatted as "2006-01-02 15:04:05", so no timezone info.
OriginalFormat string
LastUpdate string
Latitude float32
Longitude float32
Tags string
Machinetags string `json:"machine_tags"`
MachineTags string `json:"machine_tags"`
Views string
Media string
URL string `json:"url_o"`
@ -321,24 +324,41 @@ func (r *run) importPhotosPage(page int) (int, error) {
// * Conflicts: For all metadata changes, prefer any non-imported claims
// * Test!
func (r *run) importPhoto(parent *importer.Object, photo *photosSearchItem) error {
filename := fmt.Sprintf("%s.%s", photo.ID, photo.Originalformat)
filename := fmt.Sprintf("%s.%s", photo.ID, photo.OriginalFormat)
photoNode, err := parent.ChildPathObject(filename)
if err != nil {
return err
}
// https://www.flickr.com/services/api/misc.dates.html
dateTaken, err := time.ParseInLocation("2006-01-02 15:04:05", photo.DateTaken, schema.UnknownLocation)
if err != nil {
// default to the published date otherwise
log.Printf("Flickr importer: problem with date taken of photo %v, defaulting to published date instead.", photo.ID)
seconds, err := strconv.ParseInt(photo.DateUpload, 10, 64)
if err != nil {
return fmt.Errorf("could not parse date upload time %q for image %v: %v", photo.DateUpload, photo.ID, err)
}
dateTaken = time.Unix(seconds, 0)
}
attrs := []string{
attrFlickrId, photo.ID,
nodeattr.DateCreated, schema.RFC3339FromTime(dateTaken),
nodeattr.Description, photo.Description.Content,
}
if schema.IsInterestingTitle(photo.Title) {
attrs = append(attrs, nodeattr.Title, photo.Title)
}
// Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run.
// And this way if we add more things to import, they will get picked up.
if err := photoNode.SetAttrs(
importer.AttrID, photo.ID,
importer.AttrTitle, photo.Title,
importer.AttrDescription, photo.Description.Content); err != nil {
if err := photoNode.SetAttrs(attrs...); err != nil {
return err
}
// Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed.
// lastupdate is a Unix timestamp according to https://www.flickr.com/services/api/flickr.photos.getInfo.html
seconds, err := strconv.ParseInt(photo.Lastupdate, 10, 64)
seconds, err := strconv.ParseInt(photo.LastUpdate, 10, 64)
if err != nil {
return fmt.Errorf("could not parse lastupdate time for image %v: %v", photo.ID, err)
}
@ -387,7 +407,7 @@ func (r *run) getTopLevelNode(path string, title string) (*importer.Object, erro
return nil, err
}
if err := photos.SetAttr(importer.AttrTitle, title); err != nil {
if err := photos.SetAttr(nodeattr.Title, title); err != nil {
return nil, err
}
return photos, nil

View File

@ -28,6 +28,7 @@ import (
"camlistore.org/pkg/context"
"camlistore.org/pkg/importer"
"camlistore.org/pkg/schema"
"camlistore.org/pkg/schema/nodeattr"
"camlistore.org/third_party/code.google.com/p/goauth2/oauth"
"camlistore.org/third_party/github.com/tgulacsi/picago"
@ -159,7 +160,7 @@ func (r *run) importAlbum(albumsNode *importer.Object, album picago.Album, clien
if err = albumNode.SetAttrs(
"picasaId", album.ID,
"camliNodeType", "picasaweb.google.com:album",
importer.AttrTitle, album.Title,
nodeattr.Title, album.Title,
importer.AttrLocationText, album.Location,
); err != nil {
return fmt.Errorf("error setting album attributes: %v", err)
@ -257,9 +258,9 @@ func (r *run) importPhoto(albumNode *importer.Object, photo picago.Photo, client
if err := photoNode.SetAttrs(
"camliContent", fileRef.String(),
"picasaId", photo.ID,
importer.AttrTitle, photo.Title,
nodeattr.Title, photo.Title,
"caption", photo.Summary,
importer.AttrDescription, photo.Description,
nodeattr.Description, photo.Description,
importer.AttrLocationText, photo.Location,
"latitude", fmt.Sprintf("%f", photo.Latitude),
"longitude", fmt.Sprintf("%f", photo.Longitude),

View File

@ -23,4 +23,10 @@ const (
// Camlistore's addition that zone -00:01 means localtime:
// unknown timezone).
DateCreated = "dateCreated"
// http://schema.org/title
Title = "title"
// http://schema.org/description
// Value is plain text, no HTML, newlines are newlines.
Description = "description"
)

View File

@ -33,6 +33,7 @@ import (
"log"
"os"
"reflect"
"regexp"
"strconv"
"strings"
"sync"
@ -1000,3 +1001,12 @@ func lookupLocation(zone string) *time.Location {
}
return loc
}
var boringTitlePattern = regexp.MustCompile(`^(?:IMG_|DSC|PANO_|ESR_).*$`)
// IsInterestingTitle returns whether title would be interesting information as
// a title for a permanode. For example, filenames automatically created by
// cameras, such as IMG_XXXX.JPG, do not add any interesting value.
func IsInterestingTitle(title string) bool {
return !boringTitlePattern.MatchString(title)
}