From 05f86e784673ac417e095673999b6569fc9779cc Mon Sep 17 00:00:00 2001 From: Niklas Merz Date: Sat, 2 Jun 2018 12:37:55 +0200 Subject: [PATCH] importer/twitter: add likes import Import liked tweets via favorites API Change-Id: I8000bcd20cd895e540ec4d3b36035531172c7cb6 --- doc/search-ui.md | 3 + pkg/importer/twitter/twitter.go | 72 ++++++++++++++++--- pkg/search/predicate.go | 22 ++++++ .../perkeepd/ui/blob_item_twitter_content.js | 2 +- 4 files changed, 88 insertions(+), 11 deletions(-) diff --git a/doc/search-ui.md b/doc/search-ui.md index 128f6ff44..a91dabb47 100644 --- a/doc/search-ui.md +++ b/doc/search-ui.md @@ -75,6 +75,9 @@ escaping. For example: rest of whole expression (regardless of its position within the expression), logical 'or's around it are forbidden. +**is:like** +: object is a liked tweet + **is:image** : object is an image diff --git a/pkg/importer/twitter/twitter.go b/pkg/importer/twitter/twitter.go index 63fe1cbba..b17495044 100644 --- a/pkg/importer/twitter/twitter.go +++ b/pkg/importer/twitter/twitter.go @@ -56,6 +56,7 @@ const ( tokenRequestURL = "https://api.twitter.com/oauth/access_token" userInfoAPIPath = "account/verify_credentials.json" userTimeLineAPIPath = "statuses/user_timeline.json" + userLikesAPIPath = "favorites/list.json" // runCompleteVersion is a cache-busting version number of the // importer code. It should be incremented whenever the @@ -75,6 +76,13 @@ const ( // ... and re-do an import. acctAttrTweetZip = "twitterArchiveZipFileRef" + // acctAttrImportLikes specifies an optional attribte for the account permanode. + // If set to true likes are imported via the twitter API. + // You can enable importing likes like this: + // $ pk-put attr twitterImportLikes true + // ... and re-do an import. + acctAttrImportLikes = "twitterImportLikes" + // acctAttrZipDoneVersion is updated at the end of a successful zip import and // is used to determine whether the zip file needs to be re-imported in a future run. acctAttrZipDoneVersion = "twitterZipDoneVersion" // == ":" @@ -84,6 +92,12 @@ const ( tweetRequestLimit = 200 // max number of tweets we can get in a user_timeline request tweetsAtOnce = 20 // how many tweets to import at once + + // A tweet is stored as a permanode with the "twitter.com:tweet" camliNodeType value. + nodeTypeTweet = "twitter.com:tweet" + + // A like is stored as a permanode with the "twitter.com:like" camliNodeType value. + nodeTypeLike = "twitter.com:like" ) var oAuthURIs = importer.OAuthURIs{ @@ -169,6 +183,10 @@ return the zip-fileref), and signal the twitter importer that you have it, with< "pk-put attr <acct-permanode> twitterArchiveZipFileRef <zip-fileref>".
Then you can start running the importer.

+

+If you want to import likes as well, please run
+"pk-put attr <acct-permanode> twitterImportLikes true" to enable it. +

`, base, base+"/callback") } @@ -225,7 +243,18 @@ func (im *imp) Run(ctx *importer.RunContext) error { skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT")) if !skipAPITweets { - if err := r.importTweets(userID); err != nil { + if err := r.importTweets(userID, userTimeLineAPIPath); err != nil { + return err + } + } + + acctNode, err = ctx.Host.ObjectFromRef(acctNode.PermanodeRef()) + if err != nil { + return fmt.Errorf("error reloading account node: %v", err) + } + importLikes, err := strconv.ParseBool(acctNode.Attr(acctAttrImportLikes)) + if err == nil && importLikes { + if err := r.importTweets(userID, userLikesAPIPath); err != nil { return err } } @@ -339,11 +368,22 @@ func (r *run) doAPI(result interface{}, apiPath string, keyval ...string) error r.accessCreds}.PopulateJSONFromURL(result, apiURL+apiPath, keyval...) } -func (r *run) importTweets(userID string) error { +// importTweets imports the tweets related to userID, through apiPath. +// If apiPath is userTimeLineAPIPath, the tweets and retweets posted by userID are imported. +// If apiPath is userLikesAPIPath, the tweets liked by userID are imported. +func (r *run) importTweets(userID string, apiPath string) error { maxId := "" continueRequests := true - tweetsNode, err := r.getTopLevelNode("tweets") + var tweetsNode *importer.Object + var err error + var importType string + if apiPath == userLikesAPIPath { + importType = "likes" + } else { + importType = "tweets" + } + tweetsNode, err = r.getTopLevelNode(importType) if err != nil { return err } @@ -368,11 +408,11 @@ func (r *run) importTweets(userID string) error { var resp []*apiTweetItem var err error if maxId == "" { - log.Printf("twitter: fetching tweets for userid %s", userID) - err = r.doAPI(&resp, userTimeLineAPIPath, attrs...) + log.Printf("twitter: fetching %s for userid %s", importType, userID) + err = r.doAPI(&resp, apiPath, attrs...) } else { - log.Printf("twitter: fetching tweets for userid %s with max ID %s", userID, maxId) - err = r.doAPI(&resp, userTimeLineAPIPath, + log.Printf("twitter: fetching %s for userid %s with max ID %s", userID, importType, maxId) + err = r.doAPI(&resp, apiPath, append(attrs, "max_id", maxId)...) } if err != nil { @@ -416,14 +456,14 @@ func (r *run) importTweets(userID string) error { return err } numTweets += newThisBatch - log.Printf("twitter: imported %d tweets this batch; %d total.", newThisBatch, numTweets) + log.Printf("twitter: imported %d %s this batch; %d total.", newThisBatch, importType, numTweets) if r.incremental && allDups { log.Printf("twitter: incremental import found end batch") break } continueRequests = newThisBatch > 0 } - log.Printf("twitter: successfully did full run of importing %d tweets", numTweets) + log.Printf("twitter: successfully did full run of importing %d %s", numTweets, importType) return nil } @@ -533,9 +573,14 @@ func (r *run) importTweet(parent *importer.Object, tweet tweetItem, viaAPI bool) r.AccountNode().Attr(importer.AcctAttrUserName), id) + nodeType := nodeTypeTweet + if tweet.Liked() { + nodeType = nodeTypeLike + } + attrs := []string{ "twitterId", id, - nodeattr.Type, "twitter.com:tweet", + nodeattr.Type, nodeType, nodeattr.StartDate, schema.RFC3339FromTime(createdTime), nodeattr.Content, tweet.Text(), nodeattr.URL, url, @@ -617,6 +662,8 @@ func (r *run) getTopLevelNode(path string) (*importer.Object, error) { switch path { case "tweets": title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName)) + case "likes": + title = fmt.Sprintf("%s's Likes", acctNode.Attr(importer.AcctAttrUserName)) } return obj, obj.SetAttr(nodeattr.Title, title) } @@ -732,6 +779,7 @@ type tweetItem interface { CreatedAt() string Text() string Media() []tweetMedia + Liked() bool } type tweetMedia interface { @@ -744,6 +792,7 @@ type apiTweetItem struct { TextStr string `json:"text"` CreatedAtStr string `json:"created_at"` Entities entities `json:"entities"` + Favorited bool `json:"favorited"` // One or both might be present: Geo *geo `json:"geo"` // lat, long @@ -822,6 +871,9 @@ func (t *apiTweetItem) Media() (ret []tweetMedia) { return } +func (t *apiTweetItem) Liked() bool { return t.Favorited } +func (t *zipTweetItem) Liked() bool { return false } + type geo struct { Coordinates []float64 `json:"coordinates"` // lat,long } diff --git a/pkg/search/predicate.go b/pkg/search/predicate.go index 48fce134b..856095155 100644 --- a/pkg/search/predicate.go +++ b/pkg/search/predicate.go @@ -126,6 +126,7 @@ func init() { // Custom predicates registerKeyword(newIsPost()) + registerKeyword(newIsLike()) registerKeyword(newIsCheckin()) // Location predicates @@ -839,6 +840,27 @@ func (k isPost) Predicate(ctx context.Context, args []string) (*Constraint, erro }, nil } +type isLike struct { + matchEqual +} + +func newIsLike() keyword { + return isLike{"is:like"} +} + +func (k isLike) Description() string { + return "matches liked tweets" +} + +func (k isLike) Predicate(ctx context.Context, args []string) (*Constraint, error) { + return &Constraint{ + Permanode: &PermanodeConstraint{ + Attr: nodeattr.Type, + Value: "twitter.com:like", + }, + }, nil +} + type isCheckin struct { matchEqual } diff --git a/server/perkeepd/ui/blob_item_twitter_content.js b/server/perkeepd/ui/blob_item_twitter_content.js index 4a3cf61d4..6d6a5b907 100644 --- a/server/perkeepd/ui/blob_item_twitter_content.js +++ b/server/perkeepd/ui/blob_item_twitter_content.js @@ -85,7 +85,7 @@ cam.BlobItemTwitterContent.getHandler = function(blobref, searchSession, href) { return null; } - if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet') { + if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet' && cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:like') { return null; }