From bfb610d4d59f625fda67d3307f1e71c1dfedc472 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 5 May 2014 11:51:17 -0700 Subject: [PATCH] importer/foursquare: don't fetch images if we already have Change-Id: Ib36e1b7337efa2bcd3de2aa038005b11225e05a8 --- pkg/importer/foursquare/foursquare.go | 41 ++++++++++++++++++++------- pkg/importer/importer.go | 14 +++++++++ 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/pkg/importer/foursquare/foursquare.go b/pkg/importer/foursquare/foursquare.go index 3838e455f..146c5ccab 100644 --- a/pkg/importer/foursquare/foursquare.go +++ b/pkg/importer/foursquare/foursquare.go @@ -26,6 +26,7 @@ import ( "path/filepath" "sort" "strconv" + "strings" "sync" "time" @@ -196,13 +197,13 @@ func (r *run) importCheckins() error { continue } - _, err = r.importCheckin(checkinsNode, checkin, placeNode.PermanodeRef()) + _, dup, err := r.importCheckin(checkinsNode, checkin, placeNode.PermanodeRef()) if err != nil { log.Printf("Foursquare importer: error importing checkin %s %v", checkin.Id, err) continue } - err = r.importPhotos(placeNode) + err = r.importPhotos(placeNode, dup) if err != nil { log.Printf("Foursquare importer: error importing photos for checkin %s %v", checkin.Id, err) continue @@ -213,7 +214,7 @@ func (r *run) importCheckins() error { return nil } -func (r *run) importPhotos(placeNode *importer.Object) error { +func (r *run) importPhotos(placeNode *importer.Object, checkinWasDup bool) error { photosNode, err := placeNode.ChildPathObject("photos") if err != nil { return err @@ -225,8 +226,24 @@ func (r *run) importPhotos(placeNode *importer.Object) error { return err } + nHave := 0 + photosNode.ForeachAttr(func(key, value string) { + if strings.HasPrefix(key, "camliPath:") { + nHave++ + } + }) + nWant := 5 + if checkinWasDup { + nWant = 1 + } + if nHave >= nWant { + return nil + } + resp := photosList{} - if err := r.im.doAPI(r.Context, r.token(), &resp, "venues/"+placeNode.Attr("foursquareId")+"/photos", "limit", "10"); err != nil { + if err := r.im.doAPI(r.Context, r.token(), &resp, + "venues/"+placeNode.Attr("foursquareId")+"/photos", + "limit", strconv.Itoa(nWant)); err != nil { return err } @@ -242,6 +259,9 @@ func (r *run) importPhotos(placeNode *importer.Object) error { log.Printf("foursquare: importing %d photos for venue %s", len(need), placeNode.Attr("title")) for _, photo := range need { attr := "camliPath:" + photo.Id + filepath.Ext(photo.Suffix) + if photosNode.Attr(attr) != "" { + continue + } url := photo.Prefix + "original" + photo.Suffix log.Printf("foursquare: importing photo for venue %s: %s", placeNode.Attr("title"), url) ref := r.urlFileRef(url, "") @@ -258,24 +278,23 @@ func (r *run) importPhotos(placeNode *importer.Object) error { return nil } -func (r *run) importCheckin(parent *importer.Object, checkin *checkinItem, placeRef blob.Ref) (*importer.Object, error) { - checkinNode, err := parent.ChildPathObject(checkin.Id) +func (r *run) importCheckin(parent *importer.Object, checkin *checkinItem, placeRef blob.Ref) (checkinNode *importer.Object, dup bool, err error) { + checkinNode, err = parent.ChildPathObject(checkin.Id) if err != nil { - return nil, err + return } title := fmt.Sprintf("Checkin at %s", checkin.Venue.Name) - + dup = checkinNode.Attr("startDate") != "" if err := checkinNode.SetAttrs( "foursquareId", checkin.Id, "foursquareVenuePermanode", placeRef.String(), "camliNodeType", "foursquare.com:checkin", "startDate", schema.RFC3339FromTime(time.Unix(checkin.CreatedAt, 0)), "title", title); err != nil { - return nil, err + return nil, false, err } - - return checkinNode, nil + return checkinNode, dup, nil } func (r *run) importPlace(parent *importer.Object, place *venueItem) (*importer.Object, error) { diff --git a/pkg/importer/importer.go b/pkg/importer/importer.go index 44dfe2631..3f2ea8617 100644 --- a/pkg/importer/importer.go +++ b/pkg/importer/importer.go @@ -888,6 +888,20 @@ func (o *Object) Attrs(attr string) []string { return o.attr[attr] } +// ForeachAttr runs fn for each of the object's attributes & values. +// There might be multiple values for the same attribute. +// The internal lock is held while running, so no mutations should be +// made or it will deadlock. +func (o *Object) ForeachAttr(fn func(key, value string)) { + o.mu.RLock() + defer o.mu.RUnlock() + for k, vv := range o.attr { + for _, v := range vv { + fn(k, v) + } + } +} + // SetAttr sets the attribute key to value. func (o *Object) SetAttr(key, value string) error { if o.Attr(key) == value {