feat: misc improvements about upload/copy/hash (#5045)

general: add createTime/updateTime support in webdav and some drivers
general: add hash support in some drivers
general: cross-storage rapid-upload support
general: enhance upload to avoid local temp file if possible
general: replace readseekcloser with File interface to speed upstream operations
feat(aliyun_open): same as above
feat(crypt): add hack for 139cloud

Close #4934 
Close #4819 

baidu_netdisk needs to improve the upload code to support rapid-upload
This commit is contained in:
Sean
2023-08-27 21:14:23 +08:00
committed by GitHub
parent 9b765ef696
commit a3748af772
77 changed files with 1731 additions and 615 deletions

View File

@ -3,14 +3,12 @@ package aliyundrive_open
import (
"bytes"
"context"
"crypto/sha1"
"encoding/base64"
"encoding/hex"
"fmt"
"github.com/alist-org/alist/v3/pkg/http_range"
"io"
"math"
"net/http"
"os"
"strconv"
"strings"
"time"
@ -33,19 +31,19 @@ func makePartInfos(size int) []base.Json {
}
func calPartSize(fileSize int64) int64 {
var partSize int64 = 20 * 1024 * 1024
var partSize int64 = 20 * utils.MB
if fileSize > partSize {
if fileSize > 1*1024*1024*1024*1024 { // file Size over 1TB
partSize = 5 * 1024 * 1024 * 1024 // file part size 5GB
} else if fileSize > 768*1024*1024*1024 { // over 768GB
if fileSize > 1*utils.TB { // file Size over 1TB
partSize = 5 * utils.GB // file part size 5GB
} else if fileSize > 768*utils.GB { // over 768GB
partSize = 109951163 // ≈ 104.8576MB, split 1TB into 10,000 part
} else if fileSize > 512*1024*1024*1024 { // over 512GB
} else if fileSize > 512*utils.GB { // over 512GB
partSize = 82463373 // ≈ 78.6432MB
} else if fileSize > 384*1024*1024*1024 { // over 384GB
} else if fileSize > 384*utils.GB { // over 384GB
partSize = 54975582 // ≈ 52.4288MB
} else if fileSize > 256*1024*1024*1024 { // over 256GB
} else if fileSize > 256*utils.GB { // over 256GB
partSize = 41231687 // ≈ 39.3216MB
} else if fileSize > 128*1024*1024*1024 { // over 128GB
} else if fileSize > 128*utils.GB { // over 128GB
partSize = 27487791 // ≈ 26.2144MB
}
}
@ -127,17 +125,22 @@ func getProofRange(input string, size int64) (*ProofRange, error) {
return pr, nil
}
func (d *AliyundriveOpen) calProofCode(file *os.File, fileSize int64) (string, error) {
proofRange, err := getProofRange(d.AccessToken, fileSize)
func (d *AliyundriveOpen) calProofCode(stream model.FileStreamer) (string, error) {
proofRange, err := getProofRange(d.AccessToken, stream.GetSize())
if err != nil {
return "", err
}
buf := make([]byte, proofRange.End-proofRange.Start)
_, err = file.ReadAt(buf, proofRange.Start)
length := proofRange.End - proofRange.Start
buf := bytes.NewBuffer(make([]byte, 0, length))
reader, err := stream.RangeRead(http_range.Range{Start: proofRange.Start, Length: length})
if err != nil {
return "", err
}
return base64.StdEncoding.EncodeToString(buf), nil
_, err = io.CopyN(buf, reader, length)
if err != nil {
return "", err
}
return base64.StdEncoding.EncodeToString(buf.Bytes()), nil
}
func (d *AliyundriveOpen) upload(ctx context.Context, dstDir model.Obj, stream model.FileStreamer, up driver.UpdateProgress) (model.Obj, error) {
@ -145,70 +148,68 @@ func (d *AliyundriveOpen) upload(ctx context.Context, dstDir model.Obj, stream m
// Part Size Unit: Bytes, Default: 20MB,
// Maximum number of slices 10,000, ≈195.3125GB
var partSize = calPartSize(stream.GetSize())
const dateFormat = "2006-01-02T15:04:05.88Z"
mtime := stream.ModTime()
mtimeStr := mtime.UTC().Format(dateFormat)
ctimeStr := stream.CreateTime().UTC().Format(dateFormat)
createData := base.Json{
"drive_id": d.DriveId,
"parent_file_id": dstDir.GetID(),
"name": stream.GetName(),
"type": "file",
"check_name_mode": "ignore",
"drive_id": d.DriveId,
"parent_file_id": dstDir.GetID(),
"name": stream.GetName(),
"type": "file",
"check_name_mode": "ignore",
"local_modified_at": mtimeStr,
"local_created_at": ctimeStr,
}
count := int(math.Ceil(float64(stream.GetSize()) / float64(partSize)))
createData["part_info_list"] = makePartInfos(count)
// rapid upload
rapidUpload := stream.GetSize() > 100*1024 && d.RapidUpload
rapidUpload := stream.GetSize() > 100*utils.KB && d.RapidUpload
if rapidUpload {
log.Debugf("[aliyundrive_open] start cal pre_hash")
// read 1024 bytes to calculate pre hash
buf := bytes.NewBuffer(make([]byte, 0, 1024))
_, err := io.CopyN(buf, stream, 1024)
reader, err := stream.RangeRead(http_range.Range{Start: 0, Length: 1024})
if err != nil {
return nil, err
}
hash, err := utils.HashReader(utils.SHA1, reader)
if err != nil {
return nil, err
}
createData["size"] = stream.GetSize()
createData["pre_hash"] = utils.GetSHA1Encode(buf.Bytes())
// if support seek, seek to start
if localFile, ok := stream.(io.Seeker); ok {
if _, err := localFile.Seek(0, io.SeekStart); err != nil {
return nil, err
}
} else {
// Put spliced head back to stream
stream.SetReadCloser(struct {
io.Reader
io.Closer
}{
Reader: io.MultiReader(buf, stream.GetReadCloser()),
Closer: stream.GetReadCloser(),
})
}
createData["pre_hash"] = hash
}
var createResp CreateResp
_, err, e := d.requestReturnErrResp("/adrive/v1.0/openFile/create", http.MethodPost, func(req *resty.Request) {
req.SetBody(createData).SetResult(&createResp)
})
var tmpF model.File
if err != nil {
if e.Code != "PreHashMatched" || !rapidUpload {
return nil, err
}
log.Debugf("[aliyundrive_open] pre_hash matched, start rapid upload")
// convert to local file
file, err := utils.CreateTempFile(stream, stream.GetSize())
if err != nil {
return nil, err
}
_ = stream.GetReadCloser().Close()
stream.SetReadCloser(file)
// calculate full hash
h := sha1.New()
_, err = io.Copy(h, file)
if err != nil {
return nil, err
hi := stream.GetHash()
hash := hi.GetHash(utils.SHA1)
if len(hash) <= 0 {
tmpF, err = stream.CacheFullInTempFile()
if err != nil {
return nil, err
}
hash, err = utils.HashFile(utils.SHA1, tmpF)
if err != nil {
return nil, err
}
}
delete(createData, "pre_hash")
createData["proof_version"] = "v1"
createData["content_hash_name"] = "sha1"
createData["content_hash"] = hex.EncodeToString(h.Sum(nil))
createData["proof_code"], err = d.calProofCode(file, stream.GetSize())
createData["content_hash"] = hash
createData["proof_code"], err = d.calProofCode(stream)
if err != nil {
return nil, fmt.Errorf("cal proof code error: %s", err.Error())
}
@ -218,17 +219,15 @@ func (d *AliyundriveOpen) upload(ctx context.Context, dstDir model.Obj, stream m
if err != nil {
return nil, err
}
// seek to start
if _, err = file.Seek(0, io.SeekStart); err != nil {
return nil, err
}
}
if !createResp.RapidUpload {
// 2. upload
// 2. normal upload
log.Debugf("[aliyundive_open] normal upload")
preTime := time.Now()
var offset, length int64 = 0, partSize
//var length
for i := 0; i < len(createResp.PartInfoList); i++ {
if utils.IsCanceled(ctx) {
return nil, ctx.Err()
@ -241,9 +240,16 @@ func (d *AliyundriveOpen) upload(ctx context.Context, dstDir model.Obj, stream m
}
preTime = time.Now()
}
rd := utils.NewMultiReadable(io.LimitReader(stream, partSize))
if remain := stream.GetSize() - offset; length > remain {
length = remain
}
//rd := utils.NewMultiReadable(io.LimitReader(stream, partSize))
rd, err := stream.RangeRead(http_range.Range{Start: offset, Length: length})
if err != nil {
return nil, err
}
err = retry.Do(func() error {
rd.Reset()
//rd.Reset()
return d.uploadPart(ctx, rd, createResp.PartInfoList[i])
},
retry.Attempts(3),
@ -252,6 +258,7 @@ func (d *AliyundriveOpen) upload(ctx context.Context, dstDir model.Obj, stream m
if err != nil {
return nil, err
}
offset += partSize
}
} else {
log.Debugf("[aliyundrive_open] rapid upload success, file id: %s", createResp.FileId)