Merge "importer/twitter: add likes import"

This commit is contained in:
Mathieu Lonjaret 2018-08-21 23:53:37 +00:00 committed by Gerrit Code Review
commit 9a49f0959d
4 changed files with 88 additions and 11 deletions

View File

@ -75,6 +75,9 @@ escaping. For example:
rest of whole expression (regardless of its position within the expression), rest of whole expression (regardless of its position within the expression),
logical 'or's around it are forbidden. logical 'or's around it are forbidden.
**<a name="like"></a>is:like**
: object is a liked tweet
**<a name="image"></a>is:image** **<a name="image"></a>is:image**
: object is an image : object is an image

View File

@ -56,6 +56,7 @@ const (
tokenRequestURL = "https://api.twitter.com/oauth/access_token" tokenRequestURL = "https://api.twitter.com/oauth/access_token"
userInfoAPIPath = "account/verify_credentials.json" userInfoAPIPath = "account/verify_credentials.json"
userTimeLineAPIPath = "statuses/user_timeline.json" userTimeLineAPIPath = "statuses/user_timeline.json"
userLikesAPIPath = "favorites/list.json"
// runCompleteVersion is a cache-busting version number of the // runCompleteVersion is a cache-busting version number of the
// importer code. It should be incremented whenever the // importer code. It should be incremented whenever the
@ -75,6 +76,13 @@ const (
// ... and re-do an import. // ... and re-do an import.
acctAttrTweetZip = "twitterArchiveZipFileRef" acctAttrTweetZip = "twitterArchiveZipFileRef"
// acctAttrImportLikes specifies an optional attribte for the account permanode.
// If set to true likes are imported via the twitter API.
// You can enable importing likes like this:
// $ pk-put attr <acct-permanode> twitterImportLikes true
// ... and re-do an import.
acctAttrImportLikes = "twitterImportLikes"
// acctAttrZipDoneVersion is updated at the end of a successful zip import and // acctAttrZipDoneVersion is updated at the end of a successful zip import and
// is used to determine whether the zip file needs to be re-imported in a future run. // is used to determine whether the zip file needs to be re-imported in a future run.
acctAttrZipDoneVersion = "twitterZipDoneVersion" // == "<fileref>:<runCompleteVersion>" acctAttrZipDoneVersion = "twitterZipDoneVersion" // == "<fileref>:<runCompleteVersion>"
@ -84,6 +92,12 @@ const (
tweetRequestLimit = 200 // max number of tweets we can get in a user_timeline request tweetRequestLimit = 200 // max number of tweets we can get in a user_timeline request
tweetsAtOnce = 20 // how many tweets to import at once tweetsAtOnce = 20 // how many tweets to import at once
// A tweet is stored as a permanode with the "twitter.com:tweet" camliNodeType value.
nodeTypeTweet = "twitter.com:tweet"
// A like is stored as a permanode with the "twitter.com:like" camliNodeType value.
nodeTypeLike = "twitter.com:like"
) )
var oAuthURIs = importer.OAuthURIs{ var oAuthURIs = importer.OAuthURIs{
@ -169,6 +183,10 @@ return the zip-fileref), and signal the twitter importer that you have it, with<
"pk-put attr &lt;acct-permanode&gt; twitterArchiveZipFileRef &lt;zip-fileref&gt;".<br> "pk-put attr &lt;acct-permanode&gt; twitterArchiveZipFileRef &lt;zip-fileref&gt;".<br>
Then you can start running the importer. Then you can start running the importer.
</p> </p>
<p>
If you want to import likes as well, please run <br>
"pk-put attr &lt;acct-permanode&gt; twitterImportLikes true" to enable it.
</p>
`, base, base+"/callback") `, base, base+"/callback")
} }
@ -225,7 +243,18 @@ func (im *imp) Run(ctx *importer.RunContext) error {
skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT")) skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT"))
if !skipAPITweets { if !skipAPITweets {
if err := r.importTweets(userID); err != nil { if err := r.importTweets(userID, userTimeLineAPIPath); err != nil {
return err
}
}
acctNode, err = ctx.Host.ObjectFromRef(acctNode.PermanodeRef())
if err != nil {
return fmt.Errorf("error reloading account node: %v", err)
}
importLikes, err := strconv.ParseBool(acctNode.Attr(acctAttrImportLikes))
if err == nil && importLikes {
if err := r.importTweets(userID, userLikesAPIPath); err != nil {
return err return err
} }
} }
@ -339,11 +368,22 @@ func (r *run) doAPI(result interface{}, apiPath string, keyval ...string) error
r.accessCreds}.PopulateJSONFromURL(result, apiURL+apiPath, keyval...) r.accessCreds}.PopulateJSONFromURL(result, apiURL+apiPath, keyval...)
} }
func (r *run) importTweets(userID string) error { // importTweets imports the tweets related to userID, through apiPath.
// If apiPath is userTimeLineAPIPath, the tweets and retweets posted by userID are imported.
// If apiPath is userLikesAPIPath, the tweets liked by userID are imported.
func (r *run) importTweets(userID string, apiPath string) error {
maxId := "" maxId := ""
continueRequests := true continueRequests := true
tweetsNode, err := r.getTopLevelNode("tweets") var tweetsNode *importer.Object
var err error
var importType string
if apiPath == userLikesAPIPath {
importType = "likes"
} else {
importType = "tweets"
}
tweetsNode, err = r.getTopLevelNode(importType)
if err != nil { if err != nil {
return err return err
} }
@ -368,11 +408,11 @@ func (r *run) importTweets(userID string) error {
var resp []*apiTweetItem var resp []*apiTweetItem
var err error var err error
if maxId == "" { if maxId == "" {
log.Printf("twitter: fetching tweets for userid %s", userID) log.Printf("twitter: fetching %s for userid %s", importType, userID)
err = r.doAPI(&resp, userTimeLineAPIPath, attrs...) err = r.doAPI(&resp, apiPath, attrs...)
} else { } else {
log.Printf("twitter: fetching tweets for userid %s with max ID %s", userID, maxId) log.Printf("twitter: fetching %s for userid %s with max ID %s", userID, importType, maxId)
err = r.doAPI(&resp, userTimeLineAPIPath, err = r.doAPI(&resp, apiPath,
append(attrs, "max_id", maxId)...) append(attrs, "max_id", maxId)...)
} }
if err != nil { if err != nil {
@ -416,14 +456,14 @@ func (r *run) importTweets(userID string) error {
return err return err
} }
numTweets += newThisBatch numTweets += newThisBatch
log.Printf("twitter: imported %d tweets this batch; %d total.", newThisBatch, numTweets) log.Printf("twitter: imported %d %s this batch; %d total.", newThisBatch, importType, numTweets)
if r.incremental && allDups { if r.incremental && allDups {
log.Printf("twitter: incremental import found end batch") log.Printf("twitter: incremental import found end batch")
break break
} }
continueRequests = newThisBatch > 0 continueRequests = newThisBatch > 0
} }
log.Printf("twitter: successfully did full run of importing %d tweets", numTweets) log.Printf("twitter: successfully did full run of importing %d %s", numTweets, importType)
return nil return nil
} }
@ -533,9 +573,14 @@ func (r *run) importTweet(parent *importer.Object, tweet tweetItem, viaAPI bool)
r.AccountNode().Attr(importer.AcctAttrUserName), r.AccountNode().Attr(importer.AcctAttrUserName),
id) id)
nodeType := nodeTypeTweet
if tweet.Liked() {
nodeType = nodeTypeLike
}
attrs := []string{ attrs := []string{
"twitterId", id, "twitterId", id,
nodeattr.Type, "twitter.com:tweet", nodeattr.Type, nodeType,
nodeattr.StartDate, schema.RFC3339FromTime(createdTime), nodeattr.StartDate, schema.RFC3339FromTime(createdTime),
nodeattr.Content, tweet.Text(), nodeattr.Content, tweet.Text(),
nodeattr.URL, url, nodeattr.URL, url,
@ -617,6 +662,8 @@ func (r *run) getTopLevelNode(path string) (*importer.Object, error) {
switch path { switch path {
case "tweets": case "tweets":
title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName)) title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName))
case "likes":
title = fmt.Sprintf("%s's Likes", acctNode.Attr(importer.AcctAttrUserName))
} }
return obj, obj.SetAttr(nodeattr.Title, title) return obj, obj.SetAttr(nodeattr.Title, title)
} }
@ -732,6 +779,7 @@ type tweetItem interface {
CreatedAt() string CreatedAt() string
Text() string Text() string
Media() []tweetMedia Media() []tweetMedia
Liked() bool
} }
type tweetMedia interface { type tweetMedia interface {
@ -744,6 +792,7 @@ type apiTweetItem struct {
TextStr string `json:"text"` TextStr string `json:"text"`
CreatedAtStr string `json:"created_at"` CreatedAtStr string `json:"created_at"`
Entities entities `json:"entities"` Entities entities `json:"entities"`
Favorited bool `json:"favorited"`
// One or both might be present: // One or both might be present:
Geo *geo `json:"geo"` // lat, long Geo *geo `json:"geo"` // lat, long
@ -822,6 +871,9 @@ func (t *apiTweetItem) Media() (ret []tweetMedia) {
return return
} }
func (t *apiTweetItem) Liked() bool { return t.Favorited }
func (t *zipTweetItem) Liked() bool { return false }
type geo struct { type geo struct {
Coordinates []float64 `json:"coordinates"` // lat,long Coordinates []float64 `json:"coordinates"` // lat,long
} }

View File

@ -126,6 +126,7 @@ func init() {
// Custom predicates // Custom predicates
registerKeyword(newIsPost()) registerKeyword(newIsPost())
registerKeyword(newIsLike())
registerKeyword(newIsCheckin()) registerKeyword(newIsCheckin())
// Location predicates // Location predicates
@ -839,6 +840,27 @@ func (k isPost) Predicate(ctx context.Context, args []string) (*Constraint, erro
}, nil }, nil
} }
type isLike struct {
matchEqual
}
func newIsLike() keyword {
return isLike{"is:like"}
}
func (k isLike) Description() string {
return "matches liked tweets"
}
func (k isLike) Predicate(ctx context.Context, args []string) (*Constraint, error) {
return &Constraint{
Permanode: &PermanodeConstraint{
Attr: nodeattr.Type,
Value: "twitter.com:like",
},
}, nil
}
type isCheckin struct { type isCheckin struct {
matchEqual matchEqual
} }

View File

@ -85,7 +85,7 @@ cam.BlobItemTwitterContent.getHandler = function(blobref, searchSession, href) {
return null; return null;
} }
if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet') { if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet' && cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:like') {
return null; return null;
} }