stash/pkg/scraper/script.go

263 lines
5.7 KiB
Go
Raw Normal View History

2019-12-12 19:27:44 +00:00
package scraper
import (
"encoding/json"
"errors"
"fmt"
2019-12-12 19:27:44 +00:00
"io"
"os/exec"
"path/filepath"
2019-12-12 19:27:44 +00:00
"strings"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
)
type scriptScraper struct {
scraper scraperTypeConfig
config config
globalConfig GlobalConfig
}
func newScriptScraper(scraper scraperTypeConfig, config config, globalConfig GlobalConfig) *scriptScraper {
return &scriptScraper{
scraper: scraper,
config: config,
globalConfig: globalConfig,
}
}
func (s *scriptScraper) runScraperScript(inString string, out interface{}) error {
command := s.scraper.Script
if command[0] == "python" || command[0] == "python3" {
executable, err := findPythonExecutable()
if err == nil {
command[0] = executable
}
}
2019-12-12 19:27:44 +00:00
cmd := exec.Command(command[0], command[1:]...)
cmd.Dir = filepath.Dir(s.config.path)
2019-12-12 19:27:44 +00:00
stdin, err := cmd.StdinPipe()
if err != nil {
return err
}
go func() {
defer stdin.Close()
Lint checks phase 2 (#1747) * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Undo the errcheck on configurations for now. * Handle unchecked errors in pkg/manager * Resolve unchecked errors * Handle DLNA/DMS unchecked errors * Handle error checking in concurrency test Generalize config initialization, so we can initialize a configuration without writing it to disk. Use this in the test case, since otherwise the test fails to write. * Handle the remaining unchecked errors * Heed gosimple in update test * Use one-line if-initializer statements While here, fix a wrong variable capture error. * testing.T doesn't support %w use %v instead which is supported. * Remove unused query builder functions The Int/String criterion handler functions are now generalized. Thus, there's no need to keep these functions around anymore. * Mark filterBuilder.addRecursiveWith nolint The function is useful in the future and no other refactors are looking nice. Keep the function around, but tell the linter to ignore it. * Remove utils.Btoi There are no users of this utility function * Return error on scan failure If we fail to scan the row when looking for the unique checksum index, then report the error upwards. * Fix comments on exported functions * Fix typos * Fix startup error
2021-09-23 07:15:50 +00:00
if n, err := io.WriteString(stdin, inString); err != nil {
logger.Warnf("failure to write full input to script (wrote %v bytes out of %v): %v", n, len(inString), err)
}
2019-12-12 19:27:44 +00:00
}()
stderr, err := cmd.StderrPipe()
if err != nil {
logger.Error("Scraper stderr not available: " + err.Error())
}
stdout, err := cmd.StdoutPipe()
if nil != err {
logger.Error("Scraper stdout not available: " + err.Error())
}
if err = cmd.Start(); err != nil {
logger.Error("Error running scraper script: " + err.Error())
return errors.New("error running scraper script")
2019-12-12 19:27:44 +00:00
}
go handleScraperStderr(s.config.Name, stderr)
logger.Debugf("Scraper script <%s> started", strings.Join(cmd.Args, " "))
2019-12-12 19:27:44 +00:00
// TODO - add a timeout here
decodeErr := json.NewDecoder(stdout).Decode(out)
if decodeErr != nil {
logger.Error("could not unmarshal json: " + decodeErr.Error())
return errors.New("could not unmarshal json: " + decodeErr.Error())
}
2019-12-12 19:27:44 +00:00
err = cmd.Wait()
logger.Debugf("Scraper script finished")
2019-12-12 19:27:44 +00:00
if err != nil {
return errors.New("error running scraper script")
2019-12-12 19:27:44 +00:00
}
return nil
}
func (s *scriptScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
2019-12-12 19:27:44 +00:00
inString := `{"name": "` + name + `"}`
var performers []models.ScrapedPerformer
err := s.runScraperScript(inString, &performers)
2019-12-12 19:27:44 +00:00
// convert to pointers
var ret []*models.ScrapedPerformer
if err == nil {
for i := 0; i < len(performers); i++ {
ret = append(ret, &performers[i])
}
}
return ret, err
}
func (s *scriptScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
2019-12-12 19:27:44 +00:00
inString, err := json.Marshal(scrapedPerformer)
if err != nil {
return nil, err
}
var ret models.ScrapedPerformer
err = s.runScraperScript(string(inString), &ret)
2019-12-12 19:27:44 +00:00
return &ret, err
}
func (s *scriptScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
2019-12-12 19:27:44 +00:00
inString := `{"url": "` + url + `"}`
var ret models.ScrapedPerformer
err := s.runScraperScript(string(inString), &ret)
2019-12-12 19:27:44 +00:00
return &ret, err
}
func (s *scriptScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) {
inString, err := json.Marshal(sceneToUpdateInput(scene))
if err != nil {
return nil, err
}
var ret models.ScrapedScene
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) {
inString := `{"name": "` + name + `"}`
var scenes []models.ScrapedScene
err := s.runScraperScript(inString, &scenes)
// convert to pointers
var ret []*models.ScrapedScene
if err == nil {
for i := 0; i < len(scenes); i++ {
ret = append(ret, &scenes[i])
}
}
return ret, err
}
func (s *scriptScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
inString, err := json.Marshal(scene)
if err != nil {
return nil, err
}
var ret models.ScrapedScene
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
inString, err := json.Marshal(galleryToUpdateInput(gallery))
if err != nil {
return nil, err
}
var ret models.ScrapedGallery
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
2020-10-20 22:24:32 +00:00
inString, err := json.Marshal(gallery)
if err != nil {
return nil, err
}
var ret models.ScrapedGallery
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedScene
err := s.runScraperScript(string(inString), &ret)
return &ret, err
}
2020-10-20 22:24:32 +00:00
func (s *scriptScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedGallery
err := s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedMovie
err := s.runScraperScript(string(inString), &ret)
return &ret, err
}
func findPythonExecutable() (string, error) {
_, err := exec.LookPath("python3")
if err != nil {
_, err = exec.LookPath("python")
if err != nil {
return "", err
}
return "python", nil
}
return "python3", nil
}
func handleScraperStderr(name string, scraperOutputReader io.ReadCloser) {
const scraperPrefix = "[Scrape / %s] "
lgr := logger.PluginLogger{
Prefix: fmt.Sprintf(scraperPrefix, name),
DefaultLogLevel: &logger.ErrorLevel,
}
lgr.HandlePluginStdErr(scraperOutputReader)
}