stash/pkg/scraper/script.go

249 lines
6.1 KiB
Go

package scraper
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os/exec"
"path/filepath"
"strings"
"github.com/stashapp/stash/pkg/desktop"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
)
var ErrScraperScript = errors.New("scraper script error")
type scriptScraper struct {
scraper scraperTypeConfig
config config
globalConfig GlobalConfig
}
func newScriptScraper(scraper scraperTypeConfig, config config, globalConfig GlobalConfig) *scriptScraper {
return &scriptScraper{
scraper: scraper,
config: config,
globalConfig: globalConfig,
}
}
func (s *scriptScraper) runScraperScript(inString string, out interface{}) error {
command := s.scraper.Script
if command[0] == "python" || command[0] == "python3" {
executable, err := findPythonExecutable()
if err == nil {
command[0] = executable
}
}
cmd := exec.Command(command[0], command[1:]...)
cmd.Dir = filepath.Dir(s.config.path)
stdin, err := cmd.StdinPipe()
if err != nil {
return err
}
go func() {
defer stdin.Close()
if n, err := io.WriteString(stdin, inString); err != nil {
logger.Warnf("failure to write full input to script (wrote %v bytes out of %v): %v", n, len(inString), err)
}
}()
stderr, err := cmd.StderrPipe()
if err != nil {
logger.Error("Scraper stderr not available: " + err.Error())
}
stdout, err := cmd.StdoutPipe()
if nil != err {
logger.Error("Scraper stdout not available: " + err.Error())
}
desktop.HideExecShell(cmd)
if err = cmd.Start(); err != nil {
logger.Error("Error running scraper script: " + err.Error())
return errors.New("error running scraper script")
}
go handleScraperStderr(s.config.Name, stderr)
logger.Debugf("Scraper script <%s> started", strings.Join(cmd.Args, " "))
// TODO - add a timeout here
// Make a copy of stdout here. This allows us to decode it twice.
var sb strings.Builder
tr := io.TeeReader(stdout, &sb)
// First, perform a decode where unknown fields are disallowed.
d := json.NewDecoder(tr)
d.DisallowUnknownFields()
strictErr := d.Decode(out)
if strictErr != nil {
// The decode failed for some reason, use the built string
// and allow unknown fields in the decode.
s := sb.String()
lenientErr := json.NewDecoder(strings.NewReader(s)).Decode(out)
if lenientErr != nil {
// The error is genuine, so return it
logger.Errorf("could not unmarshal json from script output: %v", lenientErr)
return fmt.Errorf("could not unmarshal json from script output: %w", lenientErr)
}
// Lenient decode succeeded, print a warning, but use the decode
logger.Warnf("reading script result: %v", strictErr)
}
err = cmd.Wait()
logger.Debugf("Scraper script finished")
if err != nil {
return fmt.Errorf("%w: %v", ErrScraperScript, err)
}
return nil
}
func (s *scriptScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) {
input := `{"name": "` + name + `"}`
var ret []models.ScrapedContent
var err error
switch ty {
case models.ScrapeContentTypePerformer:
var performers []models.ScrapedPerformer
err = s.runScraperScript(input, &performers)
if err == nil {
for _, p := range performers {
v := p
ret = append(ret, &v)
}
}
case models.ScrapeContentTypeScene:
var scenes []models.ScrapedScene
err = s.runScraperScript(input, &scenes)
if err == nil {
for _, s := range scenes {
v := s
ret = append(ret, &v)
}
}
default:
return nil, ErrNotSupported
}
return ret, err
}
func (s *scriptScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) {
var inString []byte
var err error
var ty models.ScrapeContentType
switch {
case input.Performer != nil:
inString, err = json.Marshal(*input.Performer)
ty = models.ScrapeContentTypePerformer
case input.Gallery != nil:
inString, err = json.Marshal(*input.Gallery)
ty = models.ScrapeContentTypeGallery
case input.Scene != nil:
inString, err = json.Marshal(*input.Scene)
ty = models.ScrapeContentTypeScene
}
if err != nil {
return nil, err
}
return s.scrape(ctx, string(inString), ty)
}
func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
return s.scrape(ctx, `{"url": "`+url+`"}`, ty)
}
func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
switch ty {
case models.ScrapeContentTypePerformer:
var performer models.ScrapedPerformer
err := s.runScraperScript(input, &performer)
return &performer, err
case models.ScrapeContentTypeGallery:
var gallery models.ScrapedGallery
err := s.runScraperScript(input, &gallery)
return &gallery, err
case models.ScrapeContentTypeScene:
var scene models.ScrapedScene
err := s.runScraperScript(input, &scene)
return &scene, err
case models.ScrapeContentTypeMovie:
var movie models.ScrapedMovie
err := s.runScraperScript(input, &movie)
return &movie, err
}
return nil, ErrNotSupported
}
func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
inString, err := json.Marshal(sceneToUpdateInput(scene))
if err != nil {
return nil, err
}
var ret models.ScrapedScene
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
inString, err := json.Marshal(galleryToUpdateInput(gallery))
if err != nil {
return nil, err
}
var ret models.ScrapedGallery
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func findPythonExecutable() (string, error) {
_, err := exec.LookPath("python3")
if err != nil {
_, err = exec.LookPath("python")
if err != nil {
return "", err
}
return "python", nil
}
return "python3", nil
}
func handleScraperStderr(name string, scraperOutputReader io.ReadCloser) {
const scraperPrefix = "[Scrape / %s] "
lgr := logger.PluginLogger{
Prefix: fmt.Sprintf(scraperPrefix, name),
DefaultLogLevel: &logger.ErrorLevel,
}
lgr.HandlePluginStdErr(scraperOutputReader)
}