From 05f86e784673ac417e095673999b6569fc9779cc Mon Sep 17 00:00:00 2001
From: Niklas Merz
Date: Sat, 2 Jun 2018 12:37:55 +0200
Subject: [PATCH] importer/twitter: add likes import
Import liked tweets via favorites API
Change-Id: I8000bcd20cd895e540ec4d3b36035531172c7cb6
---
doc/search-ui.md | 3 +
pkg/importer/twitter/twitter.go | 72 ++++++++++++++++---
pkg/search/predicate.go | 22 ++++++
.../perkeepd/ui/blob_item_twitter_content.js | 2 +-
4 files changed, 88 insertions(+), 11 deletions(-)
diff --git a/doc/search-ui.md b/doc/search-ui.md
index 128f6ff44..a91dabb47 100644
--- a/doc/search-ui.md
+++ b/doc/search-ui.md
@@ -75,6 +75,9 @@ escaping. For example:
rest of whole expression (regardless of its position within the expression),
logical 'or's around it are forbidden.
+**is:like**
+: object is a liked tweet
+
**is:image**
: object is an image
diff --git a/pkg/importer/twitter/twitter.go b/pkg/importer/twitter/twitter.go
index 63fe1cbba..b17495044 100644
--- a/pkg/importer/twitter/twitter.go
+++ b/pkg/importer/twitter/twitter.go
@@ -56,6 +56,7 @@ const (
tokenRequestURL = "https://api.twitter.com/oauth/access_token"
userInfoAPIPath = "account/verify_credentials.json"
userTimeLineAPIPath = "statuses/user_timeline.json"
+ userLikesAPIPath = "favorites/list.json"
// runCompleteVersion is a cache-busting version number of the
// importer code. It should be incremented whenever the
@@ -75,6 +76,13 @@ const (
// ... and re-do an import.
acctAttrTweetZip = "twitterArchiveZipFileRef"
+ // acctAttrImportLikes specifies an optional attribte for the account permanode.
+ // If set to true likes are imported via the twitter API.
+ // You can enable importing likes like this:
+ // $ pk-put attr twitterImportLikes true
+ // ... and re-do an import.
+ acctAttrImportLikes = "twitterImportLikes"
+
// acctAttrZipDoneVersion is updated at the end of a successful zip import and
// is used to determine whether the zip file needs to be re-imported in a future run.
acctAttrZipDoneVersion = "twitterZipDoneVersion" // == ":"
@@ -84,6 +92,12 @@ const (
tweetRequestLimit = 200 // max number of tweets we can get in a user_timeline request
tweetsAtOnce = 20 // how many tweets to import at once
+
+ // A tweet is stored as a permanode with the "twitter.com:tweet" camliNodeType value.
+ nodeTypeTweet = "twitter.com:tweet"
+
+ // A like is stored as a permanode with the "twitter.com:like" camliNodeType value.
+ nodeTypeLike = "twitter.com:like"
)
var oAuthURIs = importer.OAuthURIs{
@@ -169,6 +183,10 @@ return the zip-fileref), and signal the twitter importer that you have it, with<
"pk-put attr <acct-permanode> twitterArchiveZipFileRef <zip-fileref>".
Then you can start running the importer.
+
+If you want to import likes as well, please run
+"pk-put attr <acct-permanode> twitterImportLikes true" to enable it.
+
`, base, base+"/callback")
}
@@ -225,7 +243,18 @@ func (im *imp) Run(ctx *importer.RunContext) error {
skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT"))
if !skipAPITweets {
- if err := r.importTweets(userID); err != nil {
+ if err := r.importTweets(userID, userTimeLineAPIPath); err != nil {
+ return err
+ }
+ }
+
+ acctNode, err = ctx.Host.ObjectFromRef(acctNode.PermanodeRef())
+ if err != nil {
+ return fmt.Errorf("error reloading account node: %v", err)
+ }
+ importLikes, err := strconv.ParseBool(acctNode.Attr(acctAttrImportLikes))
+ if err == nil && importLikes {
+ if err := r.importTweets(userID, userLikesAPIPath); err != nil {
return err
}
}
@@ -339,11 +368,22 @@ func (r *run) doAPI(result interface{}, apiPath string, keyval ...string) error
r.accessCreds}.PopulateJSONFromURL(result, apiURL+apiPath, keyval...)
}
-func (r *run) importTweets(userID string) error {
+// importTweets imports the tweets related to userID, through apiPath.
+// If apiPath is userTimeLineAPIPath, the tweets and retweets posted by userID are imported.
+// If apiPath is userLikesAPIPath, the tweets liked by userID are imported.
+func (r *run) importTweets(userID string, apiPath string) error {
maxId := ""
continueRequests := true
- tweetsNode, err := r.getTopLevelNode("tweets")
+ var tweetsNode *importer.Object
+ var err error
+ var importType string
+ if apiPath == userLikesAPIPath {
+ importType = "likes"
+ } else {
+ importType = "tweets"
+ }
+ tweetsNode, err = r.getTopLevelNode(importType)
if err != nil {
return err
}
@@ -368,11 +408,11 @@ func (r *run) importTweets(userID string) error {
var resp []*apiTweetItem
var err error
if maxId == "" {
- log.Printf("twitter: fetching tweets for userid %s", userID)
- err = r.doAPI(&resp, userTimeLineAPIPath, attrs...)
+ log.Printf("twitter: fetching %s for userid %s", importType, userID)
+ err = r.doAPI(&resp, apiPath, attrs...)
} else {
- log.Printf("twitter: fetching tweets for userid %s with max ID %s", userID, maxId)
- err = r.doAPI(&resp, userTimeLineAPIPath,
+ log.Printf("twitter: fetching %s for userid %s with max ID %s", userID, importType, maxId)
+ err = r.doAPI(&resp, apiPath,
append(attrs, "max_id", maxId)...)
}
if err != nil {
@@ -416,14 +456,14 @@ func (r *run) importTweets(userID string) error {
return err
}
numTweets += newThisBatch
- log.Printf("twitter: imported %d tweets this batch; %d total.", newThisBatch, numTweets)
+ log.Printf("twitter: imported %d %s this batch; %d total.", newThisBatch, importType, numTweets)
if r.incremental && allDups {
log.Printf("twitter: incremental import found end batch")
break
}
continueRequests = newThisBatch > 0
}
- log.Printf("twitter: successfully did full run of importing %d tweets", numTweets)
+ log.Printf("twitter: successfully did full run of importing %d %s", numTweets, importType)
return nil
}
@@ -533,9 +573,14 @@ func (r *run) importTweet(parent *importer.Object, tweet tweetItem, viaAPI bool)
r.AccountNode().Attr(importer.AcctAttrUserName),
id)
+ nodeType := nodeTypeTweet
+ if tweet.Liked() {
+ nodeType = nodeTypeLike
+ }
+
attrs := []string{
"twitterId", id,
- nodeattr.Type, "twitter.com:tweet",
+ nodeattr.Type, nodeType,
nodeattr.StartDate, schema.RFC3339FromTime(createdTime),
nodeattr.Content, tweet.Text(),
nodeattr.URL, url,
@@ -617,6 +662,8 @@ func (r *run) getTopLevelNode(path string) (*importer.Object, error) {
switch path {
case "tweets":
title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName))
+ case "likes":
+ title = fmt.Sprintf("%s's Likes", acctNode.Attr(importer.AcctAttrUserName))
}
return obj, obj.SetAttr(nodeattr.Title, title)
}
@@ -732,6 +779,7 @@ type tweetItem interface {
CreatedAt() string
Text() string
Media() []tweetMedia
+ Liked() bool
}
type tweetMedia interface {
@@ -744,6 +792,7 @@ type apiTweetItem struct {
TextStr string `json:"text"`
CreatedAtStr string `json:"created_at"`
Entities entities `json:"entities"`
+ Favorited bool `json:"favorited"`
// One or both might be present:
Geo *geo `json:"geo"` // lat, long
@@ -822,6 +871,9 @@ func (t *apiTweetItem) Media() (ret []tweetMedia) {
return
}
+func (t *apiTweetItem) Liked() bool { return t.Favorited }
+func (t *zipTweetItem) Liked() bool { return false }
+
type geo struct {
Coordinates []float64 `json:"coordinates"` // lat,long
}
diff --git a/pkg/search/predicate.go b/pkg/search/predicate.go
index 48fce134b..856095155 100644
--- a/pkg/search/predicate.go
+++ b/pkg/search/predicate.go
@@ -126,6 +126,7 @@ func init() {
// Custom predicates
registerKeyword(newIsPost())
+ registerKeyword(newIsLike())
registerKeyword(newIsCheckin())
// Location predicates
@@ -839,6 +840,27 @@ func (k isPost) Predicate(ctx context.Context, args []string) (*Constraint, erro
}, nil
}
+type isLike struct {
+ matchEqual
+}
+
+func newIsLike() keyword {
+ return isLike{"is:like"}
+}
+
+func (k isLike) Description() string {
+ return "matches liked tweets"
+}
+
+func (k isLike) Predicate(ctx context.Context, args []string) (*Constraint, error) {
+ return &Constraint{
+ Permanode: &PermanodeConstraint{
+ Attr: nodeattr.Type,
+ Value: "twitter.com:like",
+ },
+ }, nil
+}
+
type isCheckin struct {
matchEqual
}
diff --git a/server/perkeepd/ui/blob_item_twitter_content.js b/server/perkeepd/ui/blob_item_twitter_content.js
index 4a3cf61d4..6d6a5b907 100644
--- a/server/perkeepd/ui/blob_item_twitter_content.js
+++ b/server/perkeepd/ui/blob_item_twitter_content.js
@@ -85,7 +85,7 @@ cam.BlobItemTwitterContent.getHandler = function(blobref, searchSession, href) {
return null;
}
- if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet') {
+ if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet' && cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:like') {
return null;
}