mirror of https://github.com/perkeep/perkeep.git
Merge "importer/twitter: add likes import"
This commit is contained in:
commit
9a49f0959d
|
@ -75,6 +75,9 @@ escaping. For example:
|
||||||
rest of whole expression (regardless of its position within the expression),
|
rest of whole expression (regardless of its position within the expression),
|
||||||
logical 'or's around it are forbidden.
|
logical 'or's around it are forbidden.
|
||||||
|
|
||||||
|
**<a name="like"></a>is:like**
|
||||||
|
: object is a liked tweet
|
||||||
|
|
||||||
**<a name="image"></a>is:image**
|
**<a name="image"></a>is:image**
|
||||||
: object is an image
|
: object is an image
|
||||||
|
|
||||||
|
|
|
@ -56,6 +56,7 @@ const (
|
||||||
tokenRequestURL = "https://api.twitter.com/oauth/access_token"
|
tokenRequestURL = "https://api.twitter.com/oauth/access_token"
|
||||||
userInfoAPIPath = "account/verify_credentials.json"
|
userInfoAPIPath = "account/verify_credentials.json"
|
||||||
userTimeLineAPIPath = "statuses/user_timeline.json"
|
userTimeLineAPIPath = "statuses/user_timeline.json"
|
||||||
|
userLikesAPIPath = "favorites/list.json"
|
||||||
|
|
||||||
// runCompleteVersion is a cache-busting version number of the
|
// runCompleteVersion is a cache-busting version number of the
|
||||||
// importer code. It should be incremented whenever the
|
// importer code. It should be incremented whenever the
|
||||||
|
@ -75,6 +76,13 @@ const (
|
||||||
// ... and re-do an import.
|
// ... and re-do an import.
|
||||||
acctAttrTweetZip = "twitterArchiveZipFileRef"
|
acctAttrTweetZip = "twitterArchiveZipFileRef"
|
||||||
|
|
||||||
|
// acctAttrImportLikes specifies an optional attribte for the account permanode.
|
||||||
|
// If set to true likes are imported via the twitter API.
|
||||||
|
// You can enable importing likes like this:
|
||||||
|
// $ pk-put attr <acct-permanode> twitterImportLikes true
|
||||||
|
// ... and re-do an import.
|
||||||
|
acctAttrImportLikes = "twitterImportLikes"
|
||||||
|
|
||||||
// acctAttrZipDoneVersion is updated at the end of a successful zip import and
|
// acctAttrZipDoneVersion is updated at the end of a successful zip import and
|
||||||
// is used to determine whether the zip file needs to be re-imported in a future run.
|
// is used to determine whether the zip file needs to be re-imported in a future run.
|
||||||
acctAttrZipDoneVersion = "twitterZipDoneVersion" // == "<fileref>:<runCompleteVersion>"
|
acctAttrZipDoneVersion = "twitterZipDoneVersion" // == "<fileref>:<runCompleteVersion>"
|
||||||
|
@ -84,6 +92,12 @@ const (
|
||||||
|
|
||||||
tweetRequestLimit = 200 // max number of tweets we can get in a user_timeline request
|
tweetRequestLimit = 200 // max number of tweets we can get in a user_timeline request
|
||||||
tweetsAtOnce = 20 // how many tweets to import at once
|
tweetsAtOnce = 20 // how many tweets to import at once
|
||||||
|
|
||||||
|
// A tweet is stored as a permanode with the "twitter.com:tweet" camliNodeType value.
|
||||||
|
nodeTypeTweet = "twitter.com:tweet"
|
||||||
|
|
||||||
|
// A like is stored as a permanode with the "twitter.com:like" camliNodeType value.
|
||||||
|
nodeTypeLike = "twitter.com:like"
|
||||||
)
|
)
|
||||||
|
|
||||||
var oAuthURIs = importer.OAuthURIs{
|
var oAuthURIs = importer.OAuthURIs{
|
||||||
|
@ -169,6 +183,10 @@ return the zip-fileref), and signal the twitter importer that you have it, with<
|
||||||
"pk-put attr <acct-permanode> twitterArchiveZipFileRef <zip-fileref>".<br>
|
"pk-put attr <acct-permanode> twitterArchiveZipFileRef <zip-fileref>".<br>
|
||||||
Then you can start running the importer.
|
Then you can start running the importer.
|
||||||
</p>
|
</p>
|
||||||
|
<p>
|
||||||
|
If you want to import likes as well, please run <br>
|
||||||
|
"pk-put attr <acct-permanode> twitterImportLikes true" to enable it.
|
||||||
|
</p>
|
||||||
`, base, base+"/callback")
|
`, base, base+"/callback")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -225,7 +243,18 @@ func (im *imp) Run(ctx *importer.RunContext) error {
|
||||||
|
|
||||||
skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT"))
|
skipAPITweets, _ := strconv.ParseBool(os.Getenv("CAMLI_TWITTER_SKIP_API_IMPORT"))
|
||||||
if !skipAPITweets {
|
if !skipAPITweets {
|
||||||
if err := r.importTweets(userID); err != nil {
|
if err := r.importTweets(userID, userTimeLineAPIPath); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
acctNode, err = ctx.Host.ObjectFromRef(acctNode.PermanodeRef())
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error reloading account node: %v", err)
|
||||||
|
}
|
||||||
|
importLikes, err := strconv.ParseBool(acctNode.Attr(acctAttrImportLikes))
|
||||||
|
if err == nil && importLikes {
|
||||||
|
if err := r.importTweets(userID, userLikesAPIPath); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -339,11 +368,22 @@ func (r *run) doAPI(result interface{}, apiPath string, keyval ...string) error
|
||||||
r.accessCreds}.PopulateJSONFromURL(result, apiURL+apiPath, keyval...)
|
r.accessCreds}.PopulateJSONFromURL(result, apiURL+apiPath, keyval...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *run) importTweets(userID string) error {
|
// importTweets imports the tweets related to userID, through apiPath.
|
||||||
|
// If apiPath is userTimeLineAPIPath, the tweets and retweets posted by userID are imported.
|
||||||
|
// If apiPath is userLikesAPIPath, the tweets liked by userID are imported.
|
||||||
|
func (r *run) importTweets(userID string, apiPath string) error {
|
||||||
maxId := ""
|
maxId := ""
|
||||||
continueRequests := true
|
continueRequests := true
|
||||||
|
|
||||||
tweetsNode, err := r.getTopLevelNode("tweets")
|
var tweetsNode *importer.Object
|
||||||
|
var err error
|
||||||
|
var importType string
|
||||||
|
if apiPath == userLikesAPIPath {
|
||||||
|
importType = "likes"
|
||||||
|
} else {
|
||||||
|
importType = "tweets"
|
||||||
|
}
|
||||||
|
tweetsNode, err = r.getTopLevelNode(importType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -368,11 +408,11 @@ func (r *run) importTweets(userID string) error {
|
||||||
var resp []*apiTweetItem
|
var resp []*apiTweetItem
|
||||||
var err error
|
var err error
|
||||||
if maxId == "" {
|
if maxId == "" {
|
||||||
log.Printf("twitter: fetching tweets for userid %s", userID)
|
log.Printf("twitter: fetching %s for userid %s", importType, userID)
|
||||||
err = r.doAPI(&resp, userTimeLineAPIPath, attrs...)
|
err = r.doAPI(&resp, apiPath, attrs...)
|
||||||
} else {
|
} else {
|
||||||
log.Printf("twitter: fetching tweets for userid %s with max ID %s", userID, maxId)
|
log.Printf("twitter: fetching %s for userid %s with max ID %s", userID, importType, maxId)
|
||||||
err = r.doAPI(&resp, userTimeLineAPIPath,
|
err = r.doAPI(&resp, apiPath,
|
||||||
append(attrs, "max_id", maxId)...)
|
append(attrs, "max_id", maxId)...)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -416,14 +456,14 @@ func (r *run) importTweets(userID string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
numTweets += newThisBatch
|
numTweets += newThisBatch
|
||||||
log.Printf("twitter: imported %d tweets this batch; %d total.", newThisBatch, numTweets)
|
log.Printf("twitter: imported %d %s this batch; %d total.", newThisBatch, importType, numTweets)
|
||||||
if r.incremental && allDups {
|
if r.incremental && allDups {
|
||||||
log.Printf("twitter: incremental import found end batch")
|
log.Printf("twitter: incremental import found end batch")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
continueRequests = newThisBatch > 0
|
continueRequests = newThisBatch > 0
|
||||||
}
|
}
|
||||||
log.Printf("twitter: successfully did full run of importing %d tweets", numTweets)
|
log.Printf("twitter: successfully did full run of importing %d %s", numTweets, importType)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -533,9 +573,14 @@ func (r *run) importTweet(parent *importer.Object, tweet tweetItem, viaAPI bool)
|
||||||
r.AccountNode().Attr(importer.AcctAttrUserName),
|
r.AccountNode().Attr(importer.AcctAttrUserName),
|
||||||
id)
|
id)
|
||||||
|
|
||||||
|
nodeType := nodeTypeTweet
|
||||||
|
if tweet.Liked() {
|
||||||
|
nodeType = nodeTypeLike
|
||||||
|
}
|
||||||
|
|
||||||
attrs := []string{
|
attrs := []string{
|
||||||
"twitterId", id,
|
"twitterId", id,
|
||||||
nodeattr.Type, "twitter.com:tweet",
|
nodeattr.Type, nodeType,
|
||||||
nodeattr.StartDate, schema.RFC3339FromTime(createdTime),
|
nodeattr.StartDate, schema.RFC3339FromTime(createdTime),
|
||||||
nodeattr.Content, tweet.Text(),
|
nodeattr.Content, tweet.Text(),
|
||||||
nodeattr.URL, url,
|
nodeattr.URL, url,
|
||||||
|
@ -617,6 +662,8 @@ func (r *run) getTopLevelNode(path string) (*importer.Object, error) {
|
||||||
switch path {
|
switch path {
|
||||||
case "tweets":
|
case "tweets":
|
||||||
title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName))
|
title = fmt.Sprintf("%s's Tweets", acctNode.Attr(importer.AcctAttrUserName))
|
||||||
|
case "likes":
|
||||||
|
title = fmt.Sprintf("%s's Likes", acctNode.Attr(importer.AcctAttrUserName))
|
||||||
}
|
}
|
||||||
return obj, obj.SetAttr(nodeattr.Title, title)
|
return obj, obj.SetAttr(nodeattr.Title, title)
|
||||||
}
|
}
|
||||||
|
@ -732,6 +779,7 @@ type tweetItem interface {
|
||||||
CreatedAt() string
|
CreatedAt() string
|
||||||
Text() string
|
Text() string
|
||||||
Media() []tweetMedia
|
Media() []tweetMedia
|
||||||
|
Liked() bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type tweetMedia interface {
|
type tweetMedia interface {
|
||||||
|
@ -744,6 +792,7 @@ type apiTweetItem struct {
|
||||||
TextStr string `json:"text"`
|
TextStr string `json:"text"`
|
||||||
CreatedAtStr string `json:"created_at"`
|
CreatedAtStr string `json:"created_at"`
|
||||||
Entities entities `json:"entities"`
|
Entities entities `json:"entities"`
|
||||||
|
Favorited bool `json:"favorited"`
|
||||||
|
|
||||||
// One or both might be present:
|
// One or both might be present:
|
||||||
Geo *geo `json:"geo"` // lat, long
|
Geo *geo `json:"geo"` // lat, long
|
||||||
|
@ -822,6 +871,9 @@ func (t *apiTweetItem) Media() (ret []tweetMedia) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *apiTweetItem) Liked() bool { return t.Favorited }
|
||||||
|
func (t *zipTweetItem) Liked() bool { return false }
|
||||||
|
|
||||||
type geo struct {
|
type geo struct {
|
||||||
Coordinates []float64 `json:"coordinates"` // lat,long
|
Coordinates []float64 `json:"coordinates"` // lat,long
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,6 +126,7 @@ func init() {
|
||||||
|
|
||||||
// Custom predicates
|
// Custom predicates
|
||||||
registerKeyword(newIsPost())
|
registerKeyword(newIsPost())
|
||||||
|
registerKeyword(newIsLike())
|
||||||
registerKeyword(newIsCheckin())
|
registerKeyword(newIsCheckin())
|
||||||
|
|
||||||
// Location predicates
|
// Location predicates
|
||||||
|
@ -839,6 +840,27 @@ func (k isPost) Predicate(ctx context.Context, args []string) (*Constraint, erro
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type isLike struct {
|
||||||
|
matchEqual
|
||||||
|
}
|
||||||
|
|
||||||
|
func newIsLike() keyword {
|
||||||
|
return isLike{"is:like"}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k isLike) Description() string {
|
||||||
|
return "matches liked tweets"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k isLike) Predicate(ctx context.Context, args []string) (*Constraint, error) {
|
||||||
|
return &Constraint{
|
||||||
|
Permanode: &PermanodeConstraint{
|
||||||
|
Attr: nodeattr.Type,
|
||||||
|
Value: "twitter.com:like",
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
type isCheckin struct {
|
type isCheckin struct {
|
||||||
matchEqual
|
matchEqual
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,7 +85,7 @@ cam.BlobItemTwitterContent.getHandler = function(blobref, searchSession, href) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet') {
|
if (cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:tweet' && cam.permanodeUtils.getCamliNodeType(m.permanode) != 'twitter.com:like') {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue