2020-08-10 04:21:50 +00:00
|
|
|
package scraper
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"io/ioutil"
|
|
|
|
"net/url"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/stashapp/stash/pkg/logger"
|
|
|
|
"github.com/stashapp/stash/pkg/models"
|
|
|
|
"github.com/tidwall/gjson"
|
|
|
|
)
|
|
|
|
|
|
|
|
type jsonScraper struct {
|
|
|
|
scraper scraperTypeConfig
|
|
|
|
config config
|
|
|
|
globalConfig GlobalConfig
|
2021-01-18 01:23:20 +00:00
|
|
|
txnManager models.TransactionManager
|
2020-08-10 04:21:50 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
func newJsonScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *jsonScraper {
|
2020-08-10 04:21:50 +00:00
|
|
|
return &jsonScraper{
|
|
|
|
scraper: scraper,
|
|
|
|
config: config,
|
|
|
|
globalConfig: globalConfig,
|
2021-01-18 01:23:20 +00:00
|
|
|
txnManager: txnManager,
|
2020-08-10 04:21:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *jsonScraper) getJsonScraper() *mappedScraper {
|
|
|
|
return s.config.JsonScrapers[s.scraper.Scraper]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *jsonScraper) scrapeURL(url string) (string, *mappedScraper, error) {
|
|
|
|
scraper := s.getJsonScraper()
|
|
|
|
|
|
|
|
if scraper == nil {
|
|
|
|
return "", nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
|
|
|
}
|
|
|
|
|
|
|
|
doc, err := s.loadURL(url)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return doc, scraper, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *jsonScraper) loadURL(url string) (string, error) {
|
|
|
|
r, err := loadURL(url, s.config, s.globalConfig)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2021-03-01 22:19:56 +00:00
|
|
|
logger.Infof("loadURL (%s)\n", url)
|
2020-08-10 04:21:50 +00:00
|
|
|
doc, err := ioutil.ReadAll(r)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
docStr := string(doc)
|
|
|
|
if !gjson.Valid(docStr) {
|
|
|
|
return "", errors.New("not valid json")
|
|
|
|
}
|
|
|
|
|
|
|
|
if err == nil && s.config.DebugOptions != nil && s.config.DebugOptions.PrintHTML {
|
|
|
|
logger.Infof("loadURL (%s) response: \n%s", url, docStr)
|
|
|
|
}
|
|
|
|
|
|
|
|
return docStr, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
2021-03-01 22:19:56 +00:00
|
|
|
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
|
|
|
|
doc, scraper, err := s.scrapeURL(u)
|
2020-08-10 04:21:50 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := s.getJsonQuery(doc)
|
|
|
|
return scraper.scrapePerformer(q)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
2021-03-01 22:19:56 +00:00
|
|
|
u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries
|
|
|
|
doc, scraper, err := s.scrapeURL(u)
|
2020-08-10 04:21:50 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := s.getJsonQuery(doc)
|
|
|
|
return scraper.scrapeScene(q)
|
|
|
|
}
|
|
|
|
|
2020-10-20 22:24:32 +00:00
|
|
|
func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
2021-03-01 22:19:56 +00:00
|
|
|
u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries
|
|
|
|
doc, scraper, err := s.scrapeURL(u)
|
2020-10-20 22:24:32 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := s.getJsonQuery(doc)
|
|
|
|
return scraper.scrapeGallery(q)
|
|
|
|
}
|
|
|
|
|
2020-08-10 05:34:15 +00:00
|
|
|
func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
2021-03-01 22:19:56 +00:00
|
|
|
u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries
|
|
|
|
doc, scraper, err := s.scrapeURL(u)
|
2020-08-10 05:34:15 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := s.getJsonQuery(doc)
|
|
|
|
return scraper.scrapeMovie(q)
|
|
|
|
}
|
|
|
|
|
2020-08-10 04:21:50 +00:00
|
|
|
func (s *jsonScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
|
|
|
|
scraper := s.getJsonScraper()
|
|
|
|
|
|
|
|
if scraper == nil {
|
|
|
|
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
|
|
|
}
|
|
|
|
|
|
|
|
const placeholder = "{}"
|
|
|
|
|
|
|
|
// replace the placeholder string with the URL-escaped name
|
|
|
|
escapedName := url.QueryEscape(name)
|
|
|
|
|
|
|
|
url := s.scraper.QueryURL
|
|
|
|
url = strings.Replace(url, placeholder, escapedName, -1)
|
|
|
|
|
|
|
|
doc, err := s.loadURL(url)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := s.getJsonQuery(doc)
|
|
|
|
return scraper.scrapePerformers(q)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *jsonScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
|
|
|
return nil, errors.New("scrapePerformerByFragment not supported for json scraper")
|
|
|
|
}
|
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
func (s *jsonScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
2020-08-10 04:21:50 +00:00
|
|
|
// construct the URL
|
2021-09-07 01:54:22 +00:00
|
|
|
queryURL := queryURLParametersFromScene(scene)
|
2020-10-22 00:56:04 +00:00
|
|
|
if s.scraper.QueryURLReplacements != nil {
|
|
|
|
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
|
|
|
}
|
|
|
|
url := queryURL.constructURL(s.scraper.QueryURL)
|
2020-08-10 04:21:50 +00:00
|
|
|
|
|
|
|
scraper := s.getJsonScraper()
|
|
|
|
|
|
|
|
if scraper == nil {
|
|
|
|
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
|
|
|
}
|
|
|
|
|
|
|
|
doc, err := s.loadURL(url)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := s.getJsonQuery(doc)
|
|
|
|
return scraper.scrapeScene(q)
|
|
|
|
}
|
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
func (s *jsonScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
|
|
|
return nil, errors.New("scrapeSceneByFragment not supported for json scraper")
|
|
|
|
}
|
2020-10-20 22:24:32 +00:00
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
func (s *jsonScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
2020-10-22 00:56:04 +00:00
|
|
|
// construct the URL
|
2021-09-07 01:54:22 +00:00
|
|
|
queryURL := queryURLParametersFromGallery(gallery)
|
2020-10-22 00:56:04 +00:00
|
|
|
if s.scraper.QueryURLReplacements != nil {
|
|
|
|
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
|
|
|
}
|
|
|
|
url := queryURL.constructURL(s.scraper.QueryURL)
|
2020-10-20 22:24:32 +00:00
|
|
|
|
|
|
|
scraper := s.getJsonScraper()
|
|
|
|
|
|
|
|
if scraper == nil {
|
|
|
|
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
|
|
|
}
|
|
|
|
|
|
|
|
doc, err := s.loadURL(url)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := s.getJsonQuery(doc)
|
|
|
|
return scraper.scrapeGallery(q)
|
|
|
|
}
|
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
func (s *jsonScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
|
|
|
|
return nil, errors.New("scrapeGalleryByFragment not supported for json scraper")
|
|
|
|
}
|
|
|
|
|
2020-08-10 04:21:50 +00:00
|
|
|
func (s *jsonScraper) getJsonQuery(doc string) *jsonQuery {
|
|
|
|
return &jsonQuery{
|
|
|
|
doc: doc,
|
|
|
|
scraper: s,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type jsonQuery struct {
|
|
|
|
doc string
|
|
|
|
scraper *jsonScraper
|
|
|
|
}
|
|
|
|
|
|
|
|
func (q *jsonQuery) runQuery(selector string) []string {
|
|
|
|
value := gjson.Get(q.doc, selector)
|
|
|
|
|
|
|
|
if !value.Exists() {
|
|
|
|
logger.Warnf("Could not find json path '%s' in json object", selector)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var ret []string
|
|
|
|
if value.IsArray() {
|
|
|
|
value.ForEach(func(k, v gjson.Result) bool {
|
|
|
|
ret = append(ret, v.String())
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
ret = append(ret, value.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
|
|
|
func (q *jsonQuery) subScrape(value string) mappedQuery {
|
|
|
|
doc, err := q.scraper.loadURL(value)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error())
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return q.scraper.getJsonQuery(doc)
|
|
|
|
}
|