mirror of https://github.com/perkeep/perkeep.git
Merge "importer/twitter: add likes import"
This commit is contained in:
commit
9a49f0959d
|
@ -75,6 +75,9 @@ escaping. For example:
|
|||
rest of whole expression (regardless of its position within the expression),
|
||||
logical 'or's around it are forbidden.
|
||||
|
||||
**<a name="like"></a>is:like**
|
||||
: object is a liked tweet
|
||||
|
||||
**<a name="image"></a>is:image**
|
||||
: object is an image
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ const (
|
|||
tokenRequestURL = "https://api.twitter.com/oauth/access_token"
|
||||
userInfoAPIPath = "account/verify_credentials.json"
|
||||
userTimeLineAPIPath = "statuses/user_timeline.json"
|
||||
userLikesAPIPath = "favorites/list.json"
|
||||
|
||||
// runCompleteVersion is a cache-busting version number of the
|
||||
// importer code. It should be incremented whenever the
|
||||
|
@ -75,6 +76,13 @@ const (
|
|||
// ... and re-do an import.
|
||||
acctAttrTweetZip = "twitterArchiveZipFileRef"
|
||||
|
||||
// acctAttrImportLikes specifies an optional attribte for the account permanode.
|
||||
// If set to true likes are imported via the twitter API.
|
||||
// You can enable importing likes like this:
|
||||
// $ pk-put attr <acct-permanode> twitterImportLikes true
|
||||
// ... and re-do an import.
|
||||
acctAttrImportLikes = "twitterImportLikes"
|
||||
|
||||
// acctAttrZipDoneVersion is updated at the end of a successful zip import and
|
||||
// is used to determine whether the zip file needs to be re-imported in a future run.
|
||||
acctAttrZipDoneVersion = "twitterZipDoneVersion" // == "<fileref>:<runCompleteVersion>"
|
||||
|
@ -84,6 +92,12 @@ const (
|
|||
|
||||
tweetRequestLimit = 200 // max number of tweets we can get in a user_timeline request
|
||||
tweetsAtOnce = 20 // how many tweets to import at once
|
||||
|
||||
// A tweet is stored as a permanode with the "twitter.com:tweet" camliNodeType value.
|
||||
nodeTypeTweet = "twitter.com:tweet"
|
||||
|
||||
// A like is stored as a permanode with the "twitter.com:like" camliNodeType value.
|
||||
nodeTypeLike = "twitter.com:like"
|
||||
)
|
||||
|
||||
var oAuthURIs = importer.OAuthURIs{
|
||||
|
@ -169,6 +183,10 @@ return the zip-fileref), and signal the twitter importer that you have it, with<
|
|||
"pk-put attr <acct-permanode> twitterArchiveZipFileRef <zip-fileref>".<br>
|
||||
Then you can start running the importer.
|
||||
</p>
|
||||
<p>
|
||||
If you want to import likes as well, please run <br>
|
||||
"pk-put attr <acct-permanode> twitterImportLikes true" to enable it.
|
||||
</p>
|
||||
`, base, base+"/callback")
|
||||
}
|
||||
|
||||
|
@ -225,7 +243,18 @@ func (im *imp) Run(ctx *importer.RunContext) error {
|
|||
|
||||
skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT"))
|
||||
if !skipAPITweets {
|
||||
if err := r.importTweets(userID); err != nil {
|
||||
if err := r.importTweets(userID, userTimeLineAPIPath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
acctNode, err = ctx.Host.ObjectFromRef(acctNode.PermanodeRef())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reloading account node: %v", err)
|
||||
}
|
||||
importLikes, err := strconv.ParseBool(acctNode.Attr(acctAttrImportLikes))
|
||||
if err == nil && importLikes {
|
||||
if err := r.importTweets(userID, userLikesAPIPath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -339,11 +368,22 @@ func (r *run) doAPI(result interface{}, apiPath string, keyval ...string) error
|
|||
r.accessCreds}.PopulateJSONFromURL(result, apiURL+apiPath, keyval...)
|
||||
}
|
||||
|
||||
func (r *run) importTweets(userID string) error {
|
||||
// importTweets imports the tweets related to userID, through apiPath.
|
||||
// If apiPath is userTimeLineAPIPath, the tweets and retweets posted by userID are imported.
|
||||
// If apiPath is userLikesAPIPath, the tweets liked by userID are imported.
|
||||
func (r *run) importTweets(userID string, apiPath string) error {
|
||||
maxId := ""
|
||||
continueRequests := true
|
||||
|
||||
tweetsNode, err := r.getTopLevelNode("tweets")
|
||||
var tweetsNode *importer.Object
|
||||
var err error
|
||||
var importType string
|
||||
if apiPath == userLikesAPIPath {
|
||||
importType = "likes"
|
||||
} else {
|
||||
importType = "tweets"
|
||||
}
|
||||
tweetsNode, err = r.getTopLevelNode(importType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -368,11 +408,11 @@ func (r *run) importTweets(userID string) error {
|
|||
var resp []*apiTweetItem
|
||||
var err error
|
||||
if maxId == "" {
|
||||
log.Printf("twitter: fetching tweets for userid %s", userID)
|
||||
err = r.doAPI(&resp, userTimeLineAPIPath, attrs...)
|
||||
log.Printf("twitter: fetching %s for userid %s", importType, userID)
|
||||
err = r.doAPI(&resp, apiPath, attrs...)
|
||||
} else {
|
||||
log.Printf("twitter: fetching tweets for userid %s with max ID %s", userID, maxId)
|
||||
err = r.doAPI(&resp, userTimeLineAPIPath,
|
||||
log.Printf("twitter: fetching %s for userid %s with max ID %s", userID, importType, maxId)
|
||||
err = r.doAPI(&resp, apiPath,
|
||||
append(attrs, "max_id", maxId)...)
|
||||
}
|
||||
if err != nil {
|
||||
|
@ -416,14 +456,14 @@ func (r *run) importTweets(userID string) error {
|
|||
return err
|
||||
}
|
||||
numTweets += newThisBatch
|
||||
log.Printf("twitter: imported %d tweets this batch; %d total.", newThisBatch, numTweets)
|
||||
log.Printf("twitter: imported %d %s this batch; %d total.", newThisBatch, importType, numTweets)
|
||||
if r.incremental && allDups {
|
||||
log.Printf("twitter: incremental import found end batch")
|
||||
break
|
||||
}
|
||||
continueRequests = newThisBatch > 0
|
||||
}
|
||||
log.Printf("twitter: successfully did full run of importing %d tweets", numTweets)
|
||||
log.Printf("twitter: successfully did full run of importing %d %s", numTweets, importType)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -533,9 +573,14 @@ func (r *run) importTweet(parent *importer.Object, tweet tweetItem, viaAPI bool)
|
|||
r.AccountNode().Attr(importer.AcctAttrUserName),
|
||||
id)
|
||||
|
||||
nodeType := nodeTypeTweet
|
||||
if tweet.Liked() {
|
||||
nodeType = nodeTypeLike
|
||||
}
|
||||
|
||||
attrs := []string{
|
||||
"twitterId", id,
|
||||
nodeattr.Type, "twitter.com:tweet",
|
||||
nodeattr.Type, nodeType,
|
||||
nodeattr.StartDate, schema.RFC3339FromTime(createdTime),
|
||||
nodeattr.Content, tweet.Text(),
|
||||
nodeattr.URL, url,
|
||||
|
@ -617,6 +662,8 @@ func (r *run) getTopLevelNode(path string) (*importer.Object, error) {
|
|||
switch path {
|
||||
case "tweets":
|
||||
title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName))
|
||||
case "likes":
|
||||
title = fmt.Sprintf("%s's Likes", acctNode.Attr(importer.AcctAttrUserName))
|
||||
}
|
||||
return obj, obj.SetAttr(nodeattr.Title, title)
|
||||
}
|
||||
|
@ -732,6 +779,7 @@ type tweetItem interface {
|
|||
CreatedAt() string
|
||||
Text() string
|
||||
Media() []tweetMedia
|
||||
Liked() bool
|
||||
}
|
||||
|
||||
type tweetMedia interface {
|
||||
|
@ -744,6 +792,7 @@ type apiTweetItem struct {
|
|||
TextStr string `json:"text"`
|
||||
CreatedAtStr string `json:"created_at"`
|
||||
Entities entities `json:"entities"`
|
||||
Favorited bool `json:"favorited"`
|
||||
|
||||
// One or both might be present:
|
||||
Geo *geo `json:"geo"` // lat, long
|
||||
|
@ -822,6 +871,9 @@ func (t *apiTweetItem) Media() (ret []tweetMedia) {
|
|||
return
|
||||
}
|
||||
|
||||
func (t *apiTweetItem) Liked() bool { return t.Favorited }
|
||||
func (t *zipTweetItem) Liked() bool { return false }
|
||||
|
||||
type geo struct {
|
||||
Coordinates []float64 `json:"coordinates"` // lat,long
|
||||
}
|
||||
|
|
|
@ -126,6 +126,7 @@ func init() {
|
|||
|
||||
// Custom predicates
|
||||
registerKeyword(newIsPost())
|
||||
registerKeyword(newIsLike())
|
||||
registerKeyword(newIsCheckin())
|
||||
|
||||
// Location predicates
|
||||
|
@ -839,6 +840,27 @@ func (k isPost) Predicate(ctx context.Context, args []string) (*Constraint, erro
|
|||
}, nil
|
||||
}
|
||||
|
||||
type isLike struct {
|
||||
matchEqual
|
||||
}
|
||||
|
||||
func newIsLike() keyword {
|
||||
return isLike{"is:like"}
|
||||
}
|
||||
|
||||
func (k isLike) Description() string {
|
||||
return "matches liked tweets"
|
||||
}
|
||||
|
||||
func (k isLike) Predicate(ctx context.Context, args []string) (*Constraint, error) {
|
||||
return &Constraint{
|
||||
Permanode: &PermanodeConstraint{
|
||||
Attr: nodeattr.Type,
|
||||
Value: "twitter.com:like",
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
type isCheckin struct {
|
||||
matchEqual
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ cam.BlobItemTwitterContent.getHandler = function(blobref, searchSession, href) {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet') {
|
||||
if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet' && cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:like') {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue