diff --git a/pkg/importer/attrs.go b/pkg/importer/attrs.go index 149d916ba..d226bab75 100644 --- a/pkg/importer/attrs.go +++ b/pkg/importer/attrs.go @@ -56,13 +56,8 @@ const ( // ItemAttrID is the generic identifier of an item when nothing suitable and more specific // was found on http://schema.org. Usually a number. AttrID = "ID" - // http://schema.org/title - AttrTitle = "title" // http://schema.org/name AttrName = "name" - // http://schema.org/description - // Value is plain text, no HTML, newlines are newlines. - AttrDescription = "description" // Free-flowing text definition of a location or place, such // as a city name, or a full postal address. AttrLocationText = "locationText" diff --git a/pkg/importer/flickr/flickr.go b/pkg/importer/flickr/flickr.go index 93caa4e6c..088ba13db 100644 --- a/pkg/importer/flickr/flickr.go +++ b/pkg/importer/flickr/flickr.go @@ -30,6 +30,7 @@ import ( "camlistore.org/pkg/httputil" "camlistore.org/pkg/importer" "camlistore.org/pkg/schema" + "camlistore.org/pkg/schema/nodeattr" "camlistore.org/third_party/github.com/garyburd/go-oauth/oauth" ) @@ -38,6 +39,8 @@ const ( temporaryCredentialRequestURL = "https://www.flickr.com/services/oauth/request_token" resourceOwnerAuthorizationURL = "https://www.flickr.com/services/oauth/authorize" tokenRequestURL = "https://www.flickr.com/services/oauth/access_token" + + attrFlickrId = "flickrId" ) func init() { @@ -198,9 +201,9 @@ func (r *run) importPhotoset(parent *importer.Object, photoset *photosetsGetList } if err := photosetNode.SetAttrs( - importer.AttrID, photoset.ID, - importer.AttrTitle, photoset.Title.Content, - importer.AttrDescription, photoset.Description.Content, + attrFlickrId, photoset.ID, + nodeattr.Title, photoset.Title.Content, + nodeattr.Description, photoset.Description.Content, importer.AttrPrimaryImageOfPage, photoset.PrimaryPhotoID); err != nil { return 0, err } @@ -254,18 +257,18 @@ type photosSearch struct { type photosSearchItem struct { ID string `json:"id"` Title string - Ispublic int - Isfriend int - Isfamily int + IsPublic int + IsFriend int + IsFamily int Description contentString - Dateupload string - Datetaken string - Originalformat string - Lastupdate string + DateUpload string // Unix timestamp, in GMT. + DateTaken string // formatted as "2006-01-02 15:04:05", so no timezone info. + OriginalFormat string + LastUpdate string Latitude float32 Longitude float32 Tags string - Machinetags string `json:"machine_tags"` + MachineTags string `json:"machine_tags"` Views string Media string URL string `json:"url_o"` @@ -321,24 +324,41 @@ func (r *run) importPhotosPage(page int) (int, error) { // * Conflicts: For all metadata changes, prefer any non-imported claims // * Test! func (r *run) importPhoto(parent *importer.Object, photo *photosSearchItem) error { - filename := fmt.Sprintf("%s.%s", photo.ID, photo.Originalformat) + filename := fmt.Sprintf("%s.%s", photo.ID, photo.OriginalFormat) photoNode, err := parent.ChildPathObject(filename) if err != nil { return err } + // https://www.flickr.com/services/api/misc.dates.html + dateTaken, err := time.ParseInLocation("2006-01-02 15:04:05", photo.DateTaken, schema.UnknownLocation) + if err != nil { + // default to the published date otherwise + log.Printf("Flickr importer: problem with date taken of photo %v, defaulting to published date instead.", photo.ID) + seconds, err := strconv.ParseInt(photo.DateUpload, 10, 64) + if err != nil { + return fmt.Errorf("could not parse date upload time %q for image %v: %v", photo.DateUpload, photo.ID, err) + } + dateTaken = time.Unix(seconds, 0) + } + + attrs := []string{ + attrFlickrId, photo.ID, + nodeattr.DateCreated, schema.RFC3339FromTime(dateTaken), + nodeattr.Description, photo.Description.Content, + } + if schema.IsInterestingTitle(photo.Title) { + attrs = append(attrs, nodeattr.Title, photo.Title) + } // Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run. // And this way if we add more things to import, they will get picked up. - if err := photoNode.SetAttrs( - importer.AttrID, photo.ID, - importer.AttrTitle, photo.Title, - importer.AttrDescription, photo.Description.Content); err != nil { + if err := photoNode.SetAttrs(attrs...); err != nil { return err } // Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed. // lastupdate is a Unix timestamp according to https://www.flickr.com/services/api/flickr.photos.getInfo.html - seconds, err := strconv.ParseInt(photo.Lastupdate, 10, 64) + seconds, err := strconv.ParseInt(photo.LastUpdate, 10, 64) if err != nil { return fmt.Errorf("could not parse lastupdate time for image %v: %v", photo.ID, err) } @@ -387,7 +407,7 @@ func (r *run) getTopLevelNode(path string, title string) (*importer.Object, erro return nil, err } - if err := photos.SetAttr(importer.AttrTitle, title); err != nil { + if err := photos.SetAttr(nodeattr.Title, title); err != nil { return nil, err } return photos, nil diff --git a/pkg/importer/picasa/picasa.go b/pkg/importer/picasa/picasa.go index 7278086b2..53b5cee2a 100644 --- a/pkg/importer/picasa/picasa.go +++ b/pkg/importer/picasa/picasa.go @@ -28,6 +28,7 @@ import ( "camlistore.org/pkg/context" "camlistore.org/pkg/importer" "camlistore.org/pkg/schema" + "camlistore.org/pkg/schema/nodeattr" "camlistore.org/third_party/code.google.com/p/goauth2/oauth" "camlistore.org/third_party/github.com/tgulacsi/picago" @@ -159,7 +160,7 @@ func (r *run) importAlbum(albumsNode *importer.Object, album picago.Album, clien if err = albumNode.SetAttrs( "picasaId", album.ID, "camliNodeType", "picasaweb.google.com:album", - importer.AttrTitle, album.Title, + nodeattr.Title, album.Title, importer.AttrLocationText, album.Location, ); err != nil { return fmt.Errorf("error setting album attributes: %v", err) @@ -257,9 +258,9 @@ func (r *run) importPhoto(albumNode *importer.Object, photo picago.Photo, client if err := photoNode.SetAttrs( "camliContent", fileRef.String(), "picasaId", photo.ID, - importer.AttrTitle, photo.Title, + nodeattr.Title, photo.Title, "caption", photo.Summary, - importer.AttrDescription, photo.Description, + nodeattr.Description, photo.Description, importer.AttrLocationText, photo.Location, "latitude", fmt.Sprintf("%f", photo.Latitude), "longitude", fmt.Sprintf("%f", photo.Longitude), diff --git a/pkg/schema/nodeattr/nodeattr.go b/pkg/schema/nodeattr/nodeattr.go index 3ee389700..8b024087c 100644 --- a/pkg/schema/nodeattr/nodeattr.go +++ b/pkg/schema/nodeattr/nodeattr.go @@ -23,4 +23,10 @@ const ( // Camlistore's addition that zone -00:01 means localtime: // unknown timezone). DateCreated = "dateCreated" + + // http://schema.org/title + Title = "title" + // http://schema.org/description + // Value is plain text, no HTML, newlines are newlines. + Description = "description" ) diff --git a/pkg/schema/schema.go b/pkg/schema/schema.go index 6e1bc572a..565018f93 100644 --- a/pkg/schema/schema.go +++ b/pkg/schema/schema.go @@ -33,6 +33,7 @@ import ( "log" "os" "reflect" + "regexp" "strconv" "strings" "sync" @@ -1000,3 +1001,12 @@ func lookupLocation(zone string) *time.Location { } return loc } + +var boringTitlePattern = regexp.MustCompile(`^(?:IMG_|DSC|PANO_|ESR_).*$`) + +// IsInterestingTitle returns whether title would be interesting information as +// a title for a permanode. For example, filenames automatically created by +// cameras, such as IMG_XXXX.JPG, do not add any interesting value. +func IsInterestingTitle(title string) bool { + return !boringTitlePattern.MatchString(title) +}