feat: misc improvements about upload/copy/hash (#5045)

general: add createTime/updateTime support in webdav and some drivers
general: add hash support in some drivers
general: cross-storage rapid-upload support
general: enhance upload to avoid local temp file if possible
general: replace readseekcloser with File interface to speed upstream operations
feat(aliyun_open): same as above
feat(crypt): add hack for 139cloud

Close #4934 
Close #4819 

baidu_netdisk needs to improve the upload code to support rapid-upload
This commit is contained in:
Sean
2023-08-27 21:14:23 +08:00
committed by GitHub
parent 9b765ef696
commit a3748af772
77 changed files with 1731 additions and 615 deletions

278
internal/stream/stream.go Normal file
View File

@ -0,0 +1,278 @@
package stream
import (
"bytes"
"context"
"errors"
"fmt"
"github.com/alist-org/alist/v3/internal/errs"
"github.com/alist-org/alist/v3/internal/model"
"github.com/alist-org/alist/v3/pkg/http_range"
"github.com/alist-org/alist/v3/pkg/utils"
"io"
"os"
)
type FileStream struct {
Ctx context.Context
model.Obj
io.Reader
Mimetype string
WebPutAsTask bool
Exist model.Obj //the file existed in the destination, we can reuse some info since we wil overwrite it
utils.Closers
tmpFile *os.File //if present, tmpFile has full content, it will be deleted at last
peekBuff *bytes.Reader
}
func (f *FileStream) GetMimetype() string {
return f.Mimetype
}
func (f *FileStream) NeedStore() bool {
return f.WebPutAsTask
}
func (f *FileStream) Close() error {
var err1, err2 error
err1 = f.Closers.Close()
if f.tmpFile != nil {
err2 = os.RemoveAll(f.tmpFile.Name())
if err2 != nil {
err2 = errs.NewErr(err2, "failed to remove tmpFile [%s]", f.tmpFile.Name())
}
}
return errors.Join(err1, err2)
}
func (f *FileStream) GetExist() model.Obj {
return f.Exist
}
func (f *FileStream) SetExist(obj model.Obj) {
f.Exist = obj
}
// CacheFullInTempFile save all data into tmpFile. Not recommended since it wears disk,
// and can't start upload until the file is written. It's not thread-safe!
// won't check if some
func (f *FileStream) CacheFullInTempFile() (model.File, error) {
if f.tmpFile != nil {
return f.tmpFile, nil
}
if file, ok := f.Reader.(model.File); ok {
return file, nil
}
tmpF, err := utils.CreateTempFile(f.Reader, f.GetSize())
if err != nil {
return nil, err
}
f.tmpFile = tmpF
f.Reader = tmpF
return f.tmpFile, nil
}
const InMemoryBufMaxSize = 10 // Megabytes
const InMemoryBufMaxSizeBytes = InMemoryBufMaxSize * 1024 * 1024
// RangeRead have to cache all data first since only Reader is provided.
// also support a peeking RangeRead at very start, but won't buffer more than 10MB data in memory
func (f *FileStream) RangeRead(httpRange http_range.Range) (io.Reader, error) {
if httpRange.Length == -1 {
httpRange.Length = f.GetSize()
}
if f.peekBuff != nil && httpRange.Start < int64(f.peekBuff.Len()) && httpRange.Start+httpRange.Length-1 < int64(f.peekBuff.Len()) {
return io.NewSectionReader(f.peekBuff, httpRange.Start, httpRange.Length), nil
}
if httpRange.Start == 0 && httpRange.Length <= InMemoryBufMaxSizeBytes && f.peekBuff == nil {
bufSize := utils.Min(httpRange.Length, f.GetSize())
newBuf := bytes.NewBuffer(make([]byte, 0, bufSize))
n, err := io.CopyN(newBuf, f.Reader, bufSize)
if err != nil {
return nil, err
}
if n != bufSize {
return nil, fmt.Errorf("stream RangeRead did not get all data in peek, expect =%d ,actual =%d", bufSize, n)
}
f.peekBuff = bytes.NewReader(newBuf.Bytes())
f.Reader = io.MultiReader(f.peekBuff, f.Reader)
return io.NewSectionReader(f.peekBuff, httpRange.Start, httpRange.Length), nil
}
if f.tmpFile == nil {
_, err := f.CacheFullInTempFile()
if err != nil {
return nil, err
}
}
return io.NewSectionReader(f.tmpFile, httpRange.Start, httpRange.Length), nil
}
var _ model.FileStreamer = (*SeekableStream)(nil)
var _ model.FileStreamer = (*FileStream)(nil)
//var _ seekableStream = (*FileStream)(nil)
// for most internal stream, which is either RangeReadCloser or MFile
type SeekableStream struct {
FileStream
Link *model.Link
// should have one of belows to support rangeRead
rangeReadCloser model.RangeReadCloserIF
mFile model.File
}
func NewSeekableStream(fs FileStream, link *model.Link) (*SeekableStream, error) {
if len(fs.Mimetype) == 0 {
fs.Mimetype = utils.GetMimeType(fs.Obj.GetName())
}
ss := SeekableStream{FileStream: fs, Link: link}
if ss.Reader != nil {
result, ok := ss.Reader.(model.File)
if ok {
ss.mFile = result
ss.Closers.Add(result)
return &ss, nil
}
}
if ss.Link != nil {
if ss.Link.MFile != nil {
ss.mFile = ss.Link.MFile
ss.Reader = ss.Link.MFile
ss.Closers.Add(ss.Link.MFile)
return &ss, nil
}
if ss.Link.RangeReadCloser != nil {
ss.rangeReadCloser = ss.Link.RangeReadCloser
return &ss, nil
}
if len(ss.Link.URL) > 0 {
rrc, err := GetRangeReadCloserFromLink(ss.GetSize(), link)
if err != nil {
return nil, err
}
ss.rangeReadCloser = rrc
return &ss, nil
}
}
return nil, fmt.Errorf("illegal seekableStream")
}
//func (ss *SeekableStream) Peek(length int) {
//
//}
// RangeRead is not thread-safe, pls use it in single thread only.
func (ss *SeekableStream) RangeRead(httpRange http_range.Range) (io.Reader, error) {
if httpRange.Length == -1 {
httpRange.Length = ss.GetSize()
}
if ss.mFile != nil {
return io.NewSectionReader(ss.mFile, httpRange.Start, httpRange.Length), nil
}
if ss.tmpFile != nil {
return io.NewSectionReader(ss.tmpFile, httpRange.Start, httpRange.Length), nil
}
if ss.rangeReadCloser != nil {
rc, err := ss.rangeReadCloser.RangeRead(ss.Ctx, httpRange)
if err != nil {
return nil, err
}
return rc, nil
}
return nil, fmt.Errorf("can't find mFile or rangeReadCloser")
}
//func (f *FileStream) GetReader() io.Reader {
// return f.Reader
//}
// only provide Reader as full stream when it's demanded. in rapid-upload, we can skip this to save memory
func (ss *SeekableStream) Read(p []byte) (n int, err error) {
//f.mu.Lock()
//f.peekedOnce = true
//defer f.mu.Unlock()
if ss.Reader == nil {
if ss.rangeReadCloser == nil {
return 0, fmt.Errorf("illegal seekableStream")
}
rc, err := ss.rangeReadCloser.RangeRead(ss.Ctx, http_range.Range{Length: -1})
if err != nil {
return 0, nil
}
ss.Reader = io.NopCloser(rc)
ss.Closers.Add(rc)
}
return ss.Reader.Read(p)
}
func (ss *SeekableStream) CacheFullInTempFile() (model.File, error) {
if ss.tmpFile != nil {
return ss.tmpFile, nil
}
if ss.mFile != nil {
return ss.mFile, nil
}
tmpF, err := utils.CreateTempFile(ss, ss.GetSize())
if err != nil {
return nil, err
}
ss.tmpFile = tmpF
ss.Reader = tmpF
return ss.tmpFile, nil
}
//func (f *FileStream) SetReader(r io.Reader) {
// f.Reader = r
//}
/*
// RangePeek allow once peek at start of the data, since most drives check first XX bytes for rapid-upload
func (f *FileStream) RangePeek(length int64) (*bytes.Buffer, error) {
if length > InMemoryBufMaxSize*1024*1024 {
return nil, errs.NewErr(errs.StreamPeekFail, "can't peek size > %d MB", InMemoryBufMaxSize)
}
httpRange := &http_range.Range{Length: length}
bufSize := utils.Min(httpRange.Length, f.GetSize())
buf := bytes.NewBuffer(make([]byte, 0, bufSize))
if f.link == nil && f.tmpFile == nil {
if !f.peekedOnce {
f.mu.Lock()
f.peekedOnce = true
_, err := io.CopyN(buf, f.Reader, bufSize)
if err != nil {
f.mu.Unlock()
return nil, errs.NewErr(errs.StreamPeekFail, "failed to copyN %d bytes data", bufSize)
}
f.Reader = io.MultiReader(buf, f.Reader)
f.mu.Unlock()
return buf, nil
}
return nil, errs.NewErr(errs.StreamPeekFail, "link and tmpFile both are null")
}
f.mu.Lock()
defer f.mu.Unlock()
rc, _, err := GetReadCloserFromLink(f.Obj, f.link, httpRange)
if err != nil {
return nil, err
}
_, err = io.CopyN(buf, rc, bufSize)
if err != nil {
return nil, err
}
return buf, nil
}*/
//func (f *FileStream) SetTmpFile(r *os.File) {
// f.mu.Lock()
// //f.readDisabled = true
// f.tmpFile = r
// f.Reader = r
// f.mu.Unlock()
//}

84
internal/stream/util.go Normal file
View File

@ -0,0 +1,84 @@
package stream
import (
"context"
"fmt"
"github.com/alist-org/alist/v3/internal/errs"
"github.com/alist-org/alist/v3/internal/model"
"github.com/alist-org/alist/v3/internal/net"
"github.com/alist-org/alist/v3/pkg/http_range"
log "github.com/sirupsen/logrus"
"io"
"net/http"
)
func GetRangeReadCloserFromLink(size int64, link *model.Link) (model.RangeReadCloserIF, error) {
if len(link.URL) == 0 {
return nil, fmt.Errorf("can't create RangeReadCloser since URL is empty in link")
}
//remoteClosers := utils.EmptyClosers()
rangeReaderFunc := func(ctx context.Context, r http_range.Range) (io.ReadCloser, error) {
if link.Concurrency != 0 || link.PartSize != 0 {
header := net.ProcessHeader(http.Header{}, link.Header)
down := net.NewDownloader(func(d *net.Downloader) {
d.Concurrency = link.Concurrency
d.PartSize = link.PartSize
})
req := &net.HttpRequestParams{
URL: link.URL,
Range: r,
Size: size,
HeaderRef: header,
}
rc, err := down.Download(ctx, req)
if err != nil {
return nil, errs.NewErr(err, "GetReadCloserFromLink failed")
}
return rc, nil
}
if len(link.URL) > 0 {
response, err := RequestRangedHttp(ctx, link, r.Start, r.Length)
if err != nil {
return nil, fmt.Errorf("http request failure,status: %d err:%s", response.StatusCode, err)
}
if r.Start == 0 && (r.Length == -1 || r.Length == size) || response.StatusCode == http.StatusPartialContent ||
checkContentRange(&response.Header, size, r.Start) {
return response.Body, nil
} else if response.StatusCode == http.StatusOK {
log.Warnf("remote http server not supporting range request, expect low perfromace!")
readCloser, err := net.GetRangedHttpReader(response.Body, r.Start, r.Length)
if err != nil {
return nil, err
}
return readCloser, nil
}
return response.Body, nil
}
return nil, errs.NotSupport
}
resultRangeReadCloser := model.RangeReadCloser{RangeReader: rangeReaderFunc}
return &resultRangeReadCloser, nil
}
func RequestRangedHttp(ctx context.Context, link *model.Link, offset, length int64) (*http.Response, error) {
header := net.ProcessHeader(http.Header{}, link.Header)
header = http_range.ApplyRangeToHttpHeader(http_range.Range{Start: offset, Length: length}, header)
return net.RequestHttp(ctx, "GET", header, link.URL)
}
// 139 cloud does not properly return 206 http status code, add a hack here
func checkContentRange(header *http.Header, size, offset int64) bool {
r, err2 := http_range.ParseRange(header.Get("Content-Range"), size)
if err2 != nil {
log.Warnf("exception trying to parse Content-Range, will ignore,err=%s", err2)
}
if len(r) == 1 && r[0].Start == offset {
return true
}
return false
}