Warn on charset decoding issues (#3568)

This commit is contained in:
WithoutPants 2023-03-24 09:04:48 +11:00 committed by GitHub
parent 7e66741998
commit 1f578db2d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 16 additions and 14 deletions

View File

@ -58,30 +58,32 @@ func newZipFS(fs FS, path string, info fs.FileInfo) (*ZipFS, error) {
// Detect encoding
d, err := chardet.NewTextDetector().DetectBest(buffer.Bytes())
if err != nil {
reader.Close()
return nil, fmt.Errorf("unable to detect decoding: %w", err)
// If we can't detect the encoding, just assume it's UTF8
logger.Warnf("Unable to detect decoding for %s: %w", path, err)
}
// If the charset is not UTF8, decode'em
if d.Charset != "UTF-8" {
if d != nil && d.Charset != "UTF-8" {
logger.Debugf("Detected non-utf8 zip charset %s (%s): %s", d.Charset, d.Language, path)
e, _ := charset.Lookup(d.Charset)
if e == nil {
reader.Close()
return nil, fmt.Errorf("failed to lookup charset %s, language %s", d.Charset, d.Language)
}
// if we can't find the encoding, just assume it's UTF8
logger.Warnf("Failed to lookup charset %s, language %s", d.Charset, d.Language)
} else {
decoder := e.NewDecoder()
for _, f := range zipReader.File {
f.Name, _, err = transform.String(decoder, f.Name)
newName, _, err := transform.String(decoder, f.Name)
if err != nil {
reader.Close()
return nil, fmt.Errorf("failed to decode %v: %w", []byte(f.Name), err)
logger.Warnf("Failed to decode %v: %v", []byte(f.Name), err)
} else {
f.Name = newName
}
// Comments are not decoded cuz stash doesn't use that
}
}
}
return &ZipFS{
Reader: zipReader,