From 358c5055e90461e90583c788400221363d8d1627 Mon Sep 17 00:00:00 2001 From: foxxorcat <95907542+foxxorcat@users.noreply.github.com> Date: Sun, 20 Aug 2023 13:08:57 +0800 Subject: [PATCH] fix(lanzou): download not find file sgin (close #5046 in #5048) --- drivers/lanzou/help.go | 77 ++++++++++++++++++++++++++++++++++++++++++ drivers/lanzou/util.go | 9 +++-- 2 files changed, 83 insertions(+), 3 deletions(-) diff --git a/drivers/lanzou/help.go b/drivers/lanzou/help.go index 89f8e071..f5b8ce42 100644 --- a/drivers/lanzou/help.go +++ b/drivers/lanzou/help.go @@ -125,6 +125,83 @@ func findJSVarFunc(key, data string) string { return values[1] } +var findFunction = regexp.MustCompile(`(?ims)^function[^{]+`) +var findFunctionAll = regexp.MustCompile(`(?is)function[^{]+`) + +// 查找所有方法位置 +func findJSFunctionIndex(data string, all bool) [][2]int { + findFunction := findFunction + if all { + findFunction = findFunctionAll + } + + indexs := findFunction.FindAllStringIndex(data, -1) + fIndexs := make([][2]int, 0, len(indexs)) + + for _, index := range indexs { + if len(index) != 2 { + continue + } + count, data := 0, data[index[1]:] + for ii, v := range data { + if v == ' ' && count == 0 { + continue + } + if v == '{' { + count++ + } + + if v == '}' { + count-- + } + if count == 0 { + fIndexs = append(fIndexs, [2]int{index[0], index[1] + ii + 1}) + break + } + } + } + return fIndexs +} + +// 删除JS全局方法 +func removeJSGlobalFunction(html string) string { + indexs := findJSFunctionIndex(html, false) + block := make([]string, len(indexs)) + for i, next := len(indexs)-1, len(html); i >= 0; i-- { + index := indexs[i] + block[i] = html[index[1]:next] + next = index[0] + } + return strings.Join(block, "") +} + +// 根据名称获取方法 +func getJSFunctionByName(html string, name string) (string, error) { + indexs := findJSFunctionIndex(html, true) + for _, index := range indexs { + data := html[index[0]:index[1]] + if regexp.MustCompile(`function\s+` + name + `[()\s]+{`).MatchString(data) { + return data, nil + } + } + return "", fmt.Errorf("not find %s function", name) +} + +// 解析html中的JSON,选择最长的数据 +func htmlJsonToMap2(html string) (map[string]string, error) { + datas := findDataReg.FindAllStringSubmatch(html, -1) + var sData string + for _, data := range datas { + if len(datas) > 0 && len(data[1]) > len(sData) { + sData = data[1] + } + } + if sData == "" { + return nil, fmt.Errorf("not find data") + } + return jsonToMap(sData, html), nil +} + // 解析html中的JSON func htmlJsonToMap(html string) (map[string]string, error) { datas := findDataReg.FindStringSubmatch(html) diff --git a/drivers/lanzou/util.go b/drivers/lanzou/util.go index 6e2f05cc..272cbbfe 100644 --- a/drivers/lanzou/util.go +++ b/drivers/lanzou/util.go @@ -346,7 +346,11 @@ func (d *LanZou) getFilesByShareUrl(shareID, pwd string, sharePageData string) ( // 需要密码 if strings.Contains(sharePageData, "pwdload") || strings.Contains(sharePageData, "passwddiv") { - param, err := htmlFormToMap(sharePageData) + sharePageData, err := getJSFunctionByName(sharePageData, "down_p") + if err != nil { + return nil, err + } + param, err := htmlJsonToMap(sharePageData) if err != nil { return nil, err } @@ -370,8 +374,7 @@ func (d *LanZou) getFilesByShareUrl(shareID, pwd string, sharePageData string) ( if err != nil { return nil, err } - nextPageData := RemoveNotes(string(data)) - + nextPageData := removeJSGlobalFunction(RemoveNotes(string(data))) param, err = htmlJsonToMap(nextPageData) if err != nil { return nil, err