Add unix timestamp parsing to scrapers parseDate (#2817)

* Add unix timestamp parsing to scrapers parseDate
* Add documentation
* Update ScraperDevelopment.md
* Add unit test

Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
JackDawson94 2022-09-30 07:35:56 +02:00 committed by GitHub
parent 6c6e0b6236
commit 554448594c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 79 additions and 1 deletions

View File

@ -394,6 +394,19 @@ func (p *postProcessParseDate) Apply(ctx context.Context, value string, q mapped
return value
}
if parseDate == "unix" {
// try to parse the date using unix timestamp format
// if it fails, then just fall back to the original value
timeAsInt, err := strconv.ParseInt(value, 10, 64)
if err != nil {
logger.Warnf("Error parsing date string '%s' using unix timestamp format : %s", value, err.Error())
return value
}
parsedValue := time.Unix(timeAsInt, 0)
return parsedValue.Format(internalDateFormat)
}
// try to parse the date using the pattern
// if it fails, then just fall back to the original value
parsedValue, err := time.Parse(parseDate, value)

View File

@ -2,7 +2,9 @@ package scraper
import (
"context"
"strconv"
"testing"
"time"
"github.com/stretchr/testify/assert"
"gopkg.in/yaml.v2"
@ -59,3 +61,57 @@ func TestFeetToCM(t *testing.T) {
assert.Equal(t, test.out, pp.Apply(context.Background(), test.in, q))
}
}
func Test_postProcessParseDate_Apply(t *testing.T) {
const internalDateFormat = "2006-01-02"
unixDate := time.Date(2021, 9, 4, 1, 2, 3, 4, time.Local)
tests := []struct {
name string
arg postProcessParseDate
value string
want string
}{
{
"simple",
"2006=01=02",
"2001=03=23",
"2001-03-23",
},
{
"today",
"",
"today",
time.Now().Format(internalDateFormat),
},
{
"yesterday",
"",
"yesterday",
time.Now().Add(-24 * time.Hour).Format(internalDateFormat),
},
{
"unix",
"unix",
strconv.FormatInt(unixDate.Unix(), 10),
unixDate.Format(internalDateFormat),
},
{
"invalid",
"invalid",
"2001=03=23",
"2001=03=23",
},
}
ctx := context.Background()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := tt.arg.Apply(ctx, tt.value, nil); got != tt.want {
t.Errorf("postProcessParseDate.Apply() = %v, want %v", got, tt.want)
}
})
}
}

View File

@ -14,6 +14,7 @@ After migrating, please run a scan on your entire library to populate missing da
* Added release notes dialog. ([#2726](https://github.com/stashapp/stash/pull/2726))
### 🎨 Improvements
* Added unix timestamp parsing in the `parseDate` scraper post processor. ([#2817](https://github.com/stashapp/stash/pull/2817))
* Improve matching scene order in the tagger to prioritise matching phashes and durations. ([#2840](https://github.com/stashapp/stash/pull/2840))
* Encode reserved characters in query URLs. ([#2899](https://github.com/stashapp/stash/pull/2899))
* Object titles are now displayed as the file basename if the title is not explicitly set. The `Don't include file extension as part of the title` scan flag is no longer supported.

View File

@ -366,7 +366,15 @@ performer:
Gets the contents of the selected div element, and sets the returned value to `Female` if the scraped value is `F`; `Male` if the scraped value is `M`.
Height and weight are extracted from the selected spans and converted to `cm` and `kg`.
* `parseDate`: if present, the value is the date format using go's reference date (2006-01-02). For example, if an example date was `14-Mar-2003`, then the date format would be `02-Jan-2006`. See the [time.Parse documentation](https://golang.org/pkg/time/#Parse) for details. When present, the scraper will convert the input string into a date, then convert it to the string format used by stash (`YYYY-MM-DD`). Strings "Today", "Yesterday" are matched (case insensitive) and converted by the scraper so you don't need to edit/replace them.
* `parseDate`: if present, the value is the date format using go's reference date (2006-01-02). For example, if an example date was `14-Mar-2003`, then the date format would be `02-Jan-2006`. See the [time.Parse documentation](https://golang.org/pkg/time/#Parse) for details. When present, the scraper will convert the input string into a date, then convert it to the string format used by stash (`YYYY-MM-DD`). Strings "Today", "Yesterday" are matched (case insensitive) and converted by the scraper so you don't need to edit/replace them.
Unix timestamps (example: 1660169451) can also be parsed by selecting `unix` as the date format.
Example:
```yaml
Date:
selector: //div[@class="value epoch"]/text()
postProcess:
- parseDate: unix
```
* `subtractDays`: if set to `true` it subtracts the value in days from the current date and returns the resulting date in stash's date format.
Example: