feat(archive viewer): option to select text encoding for zip files (#2867)

pull/2892/head 4.8.0
Aaron Liu 7 days ago
parent a581851f84
commit 7d97237593

@ -1 +1 @@
Subproject commit dcf21d5eb9fbb635e81ab3c13b44e1233db5cac9 Subproject commit dece1c7098de2efe38aaa25d6cafc41a2de568ff

@ -17,6 +17,13 @@ import (
"github.com/cloudreve/Cloudreve/v4/pkg/filemanager/fs/dbfs" "github.com/cloudreve/Cloudreve/v4/pkg/filemanager/fs/dbfs"
"github.com/cloudreve/Cloudreve/v4/pkg/filemanager/manager/entitysource" "github.com/cloudreve/Cloudreve/v4/pkg/filemanager/manager/entitysource"
"github.com/cloudreve/Cloudreve/v4/pkg/util" "github.com/cloudreve/Cloudreve/v4/pkg/util"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
"golang.org/x/tools/container/intsets" "golang.org/x/tools/container/intsets"
) )
@ -37,7 +44,47 @@ func init() {
gob.Register([]ArchivedFile{}) gob.Register([]ArchivedFile{})
} }
func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity string) ([]ArchivedFile, error) { var ZipEncodings = map[string]encoding.Encoding{
"ibm866": charmap.CodePage866,
"iso8859_2": charmap.ISO8859_2,
"iso8859_3": charmap.ISO8859_3,
"iso8859_4": charmap.ISO8859_4,
"iso8859_5": charmap.ISO8859_5,
"iso8859_6": charmap.ISO8859_6,
"iso8859_7": charmap.ISO8859_7,
"iso8859_8": charmap.ISO8859_8,
"iso8859_8I": charmap.ISO8859_8I,
"iso8859_10": charmap.ISO8859_10,
"iso8859_13": charmap.ISO8859_13,
"iso8859_14": charmap.ISO8859_14,
"iso8859_15": charmap.ISO8859_15,
"iso8859_16": charmap.ISO8859_16,
"koi8r": charmap.KOI8R,
"koi8u": charmap.KOI8U,
"macintosh": charmap.Macintosh,
"windows874": charmap.Windows874,
"windows1250": charmap.Windows1250,
"windows1251": charmap.Windows1251,
"windows1252": charmap.Windows1252,
"windows1253": charmap.Windows1253,
"windows1254": charmap.Windows1254,
"windows1255": charmap.Windows1255,
"windows1256": charmap.Windows1256,
"windows1257": charmap.Windows1257,
"windows1258": charmap.Windows1258,
"macintoshcyrillic": charmap.MacintoshCyrillic,
"gbk": simplifiedchinese.GBK,
"gb18030": simplifiedchinese.GB18030,
"big5": traditionalchinese.Big5,
"eucjp": japanese.EUCJP,
"iso2022jp": japanese.ISO2022JP,
"shiftjis": japanese.ShiftJIS,
"euckr": korean.EUCKR,
"utf16be": unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
"utf16le": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
}
func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity, zipEncoding string) ([]ArchivedFile, error) {
file, err := m.fs.Get(ctx, uri, dbfs.WithFileEntities(), dbfs.WithRequiredCapabilities(dbfs.NavigatorCapabilityDownloadFile)) file, err := m.fs.Get(ctx, uri, dbfs.WithFileEntities(), dbfs.WithRequiredCapabilities(dbfs.NavigatorCapabilityDownloadFile))
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get file: %w", err) return nil, fmt.Errorf("failed to get file: %w", err)
@ -57,7 +104,18 @@ func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity stri
return nil, fs.ErrEntityNotExist return nil, fs.ErrEntityNotExist
} }
cacheKey := getArchiveListCacheKey(targetEntity.ID()) var (
enc encoding.Encoding
ok bool
)
if zipEncoding != "" {
enc, ok = ZipEncodings[strings.ToLower(zipEncoding)]
if !ok {
return nil, fs.ErrNotSupportedAction.WithError(fmt.Errorf("not supported zip encoding: %s", zipEncoding))
}
}
cacheKey := getArchiveListCacheKey(targetEntity.ID(), zipEncoding)
kv := m.kv kv := m.kv
res, found := kv.Get(cacheKey) res, found := kv.Get(cacheKey)
if found { if found {
@ -72,7 +130,7 @@ func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity stri
es.Apply(entitysource.WithContext(ctx)) es.Apply(entitysource.WithContext(ctx))
defer es.Close() defer es.Close()
var readerFunc func(ctx context.Context, file io.ReaderAt, size int64) ([]ArchivedFile, error) var readerFunc func(ctx context.Context, file io.ReaderAt, size int64, textEncoding encoding.Encoding) ([]ArchivedFile, error)
switch file.Ext() { switch file.Ext() {
case "zip": case "zip":
readerFunc = getZipFileList readerFunc = getZipFileList
@ -83,7 +141,7 @@ func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity stri
} }
sr := io.NewSectionReader(es, 0, targetEntity.Size()) sr := io.NewSectionReader(es, 0, targetEntity.Size())
fileList, err := readerFunc(ctx, sr, targetEntity.Size()) fileList, err := readerFunc(ctx, sr, targetEntity.Size(), enc)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to read file list: %w", err) return nil, fmt.Errorf("failed to read file list: %w", err)
} }
@ -199,7 +257,7 @@ func (m *manager) compressFileToArchive(ctx context.Context, parent string, file
} }
func getZipFileList(ctx context.Context, file io.ReaderAt, size int64) ([]ArchivedFile, error) { func getZipFileList(ctx context.Context, file io.ReaderAt, size int64, textEncoding encoding.Encoding) ([]ArchivedFile, error) {
zr, err := zip.NewReader(file, size) zr, err := zip.NewReader(file, size)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create zip reader: %w", err) return nil, fmt.Errorf("failed to create zip reader: %w", err)
@ -207,10 +265,25 @@ func getZipFileList(ctx context.Context, file io.ReaderAt, size int64) ([]Archiv
fileList := make([]ArchivedFile, 0, len(zr.File)) fileList := make([]ArchivedFile, 0, len(zr.File))
for _, f := range zr.File { for _, f := range zr.File {
hdr := f.FileHeader
if hdr.NonUTF8 && textEncoding != nil {
dec := textEncoding.NewDecoder()
filename, err := dec.String(hdr.Name)
if err == nil {
hdr.Name = filename
}
if hdr.Comment != "" {
comment, err := dec.String(hdr.Comment)
if err == nil {
hdr.Comment = comment
}
}
}
info := f.FileInfo() info := f.FileInfo()
modTime := info.ModTime() modTime := info.ModTime()
fileList = append(fileList, ArchivedFile{ fileList = append(fileList, ArchivedFile{
Name: util.FormSlash(f.Name), Name: util.FormSlash(hdr.Name),
Size: info.Size(), Size: info.Size(),
UpdatedAt: &modTime, UpdatedAt: &modTime,
IsDirectory: info.IsDir(), IsDirectory: info.IsDir(),
@ -219,7 +292,7 @@ func getZipFileList(ctx context.Context, file io.ReaderAt, size int64) ([]Archiv
return fileList, nil return fileList, nil
} }
func get7zFileList(ctx context.Context, file io.ReaderAt, size int64) ([]ArchivedFile, error) { func get7zFileList(ctx context.Context, file io.ReaderAt, size int64, extEncoding encoding.Encoding) ([]ArchivedFile, error) {
zr, err := sevenzip.NewReader(file, size) zr, err := sevenzip.NewReader(file, size)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create 7z reader: %w", err) return nil, fmt.Errorf("failed to create 7z reader: %w", err)
@ -239,6 +312,6 @@ func get7zFileList(ctx context.Context, file io.ReaderAt, size int64) ([]Archive
return fileList, nil return fileList, nil
} }
func getArchiveListCacheKey(entity int) string { func getArchiveListCacheKey(entity int, encoding string) string {
return fmt.Sprintf("archive_list_%d", entity) return fmt.Sprintf("archive_list_%d_%s", entity, encoding)
} }

@ -88,7 +88,7 @@ type (
// CreateArchive creates an archive // CreateArchive creates an archive
CreateArchive(ctx context.Context, uris []*fs.URI, writer io.Writer, opts ...fs.Option) (int, error) CreateArchive(ctx context.Context, uris []*fs.URI, writer io.Writer, opts ...fs.Option) (int, error)
// ListArchiveFiles lists files in an archive // ListArchiveFiles lists files in an archive
ListArchiveFiles(ctx context.Context, uri *fs.URI, entity string) ([]ArchivedFile, error) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity, zipEncoding string) ([]ArchivedFile, error)
} }
FileManager interface { FileManager interface {

@ -27,13 +27,6 @@ import (
"github.com/cloudreve/Cloudreve/v4/pkg/util" "github.com/cloudreve/Cloudreve/v4/pkg/util"
"github.com/gofrs/uuid" "github.com/gofrs/uuid"
"github.com/mholt/archives" "github.com/mholt/archives"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
) )
type ( type (
@ -79,46 +72,6 @@ func init() {
queue.RegisterResumableTaskFactory(queue.ExtractArchiveTaskType, NewExtractArchiveTaskFromModel) queue.RegisterResumableTaskFactory(queue.ExtractArchiveTaskType, NewExtractArchiveTaskFromModel)
} }
var encodings = map[string]encoding.Encoding{
"ibm866": charmap.CodePage866,
"iso8859_2": charmap.ISO8859_2,
"iso8859_3": charmap.ISO8859_3,
"iso8859_4": charmap.ISO8859_4,
"iso8859_5": charmap.ISO8859_5,
"iso8859_6": charmap.ISO8859_6,
"iso8859_7": charmap.ISO8859_7,
"iso8859_8": charmap.ISO8859_8,
"iso8859_8I": charmap.ISO8859_8I,
"iso8859_10": charmap.ISO8859_10,
"iso8859_13": charmap.ISO8859_13,
"iso8859_14": charmap.ISO8859_14,
"iso8859_15": charmap.ISO8859_15,
"iso8859_16": charmap.ISO8859_16,
"koi8r": charmap.KOI8R,
"koi8u": charmap.KOI8U,
"macintosh": charmap.Macintosh,
"windows874": charmap.Windows874,
"windows1250": charmap.Windows1250,
"windows1251": charmap.Windows1251,
"windows1252": charmap.Windows1252,
"windows1253": charmap.Windows1253,
"windows1254": charmap.Windows1254,
"windows1255": charmap.Windows1255,
"windows1256": charmap.Windows1256,
"windows1257": charmap.Windows1257,
"windows1258": charmap.Windows1258,
"macintoshcyrillic": charmap.MacintoshCyrillic,
"gbk": simplifiedchinese.GBK,
"gb18030": simplifiedchinese.GB18030,
"big5": traditionalchinese.Big5,
"eucjp": japanese.EUCJP,
"iso2022jp": japanese.ISO2022JP,
"shiftjis": japanese.ShiftJIS,
"euckr": korean.EUCKR,
"utf16be": unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
"utf16le": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
}
// NewExtractArchiveTask creates a new ExtractArchiveTask // NewExtractArchiveTask creates a new ExtractArchiveTask
func NewExtractArchiveTask(ctx context.Context, src, dst, encoding, password string, mask []string) (queue.Task, error) { func NewExtractArchiveTask(ctx context.Context, src, dst, encoding, password string, mask []string) (queue.Task, error) {
state := &ExtractArchiveTaskState{ state := &ExtractArchiveTaskState{
@ -374,7 +327,7 @@ func (m *ExtractArchiveTask) masterExtractArchive(ctx context.Context, dep depen
if zipExtractor, ok := extractor.(archives.Zip); ok { if zipExtractor, ok := extractor.(archives.Zip); ok {
if m.state.Encoding != "" { if m.state.Encoding != "" {
m.l.Info("Using encoding %q for zip archive", m.state.Encoding) m.l.Info("Using encoding %q for zip archive", m.state.Encoding)
encoding, ok := encodings[strings.ToLower(m.state.Encoding)] encoding, ok := manager.ZipEncodings[strings.ToLower(m.state.Encoding)]
if !ok { if !ok {
m.l.Warning("Unknown encoding %q, fallback to default encoding", m.state.Encoding) m.l.Warning("Unknown encoding %q, fallback to default encoding", m.state.Encoding)
} else { } else {
@ -750,7 +703,7 @@ func (m *SlaveExtractArchiveTask) Do(ctx context.Context) (task.Status, error) {
if zipExtractor, ok := extractor.(archives.Zip); ok { if zipExtractor, ok := extractor.(archives.Zip); ok {
if m.state.Encoding != "" { if m.state.Encoding != "" {
m.l.Info("Using encoding %q for zip archive", m.state.Encoding) m.l.Info("Using encoding %q for zip archive", m.state.Encoding)
encoding, ok := encodings[strings.ToLower(m.state.Encoding)] encoding, ok := manager.ZipEncodings[strings.ToLower(m.state.Encoding)]
if !ok { if !ok {
m.l.Warning("Unknown encoding %q, fallback to default encoding", m.state.Encoding) m.l.Warning("Unknown encoding %q, fallback to default encoding", m.state.Encoding)
} else { } else {

@ -720,8 +720,9 @@ func (s *PatchViewService) Patch(c *gin.Context) error {
type ( type (
ArchiveListFilesParamCtx struct{} ArchiveListFilesParamCtx struct{}
ArchiveListFilesService struct { ArchiveListFilesService struct {
Uri string `form:"uri" binding:"required"` Uri string `form:"uri" binding:"required"`
Entity string `form:"entity"` Entity string `form:"entity"`
TextEncoding string `form:"text_encoding"`
} }
) )
@ -739,7 +740,7 @@ func (s *ArchiveListFilesService) List(c *gin.Context) (*ArchiveListFilesRespons
return nil, serializer.NewError(serializer.CodeParamErr, "unknown uri", err) return nil, serializer.NewError(serializer.CodeParamErr, "unknown uri", err)
} }
files, err := m.ListArchiveFiles(c, uri, s.Entity) files, err := m.ListArchiveFiles(c, uri, s.Entity, s.TextEncoding)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to list archive files: %w", err) return nil, fmt.Errorf("failed to list archive files: %w", err)
} }

Loading…
Cancel
Save