diff --git a/.github/workflows/check_exif.yml b/.github/workflows/check_exif.yml index 680365f..ca2ee0d 100644 --- a/.github/workflows/check_exif.yml +++ b/.github/workflows/check_exif.yml @@ -38,30 +38,42 @@ jobs: const IMAGE_EXT = /\.(jpe?g|png|webp|tiff?|hei[cf]|avif|gif)$/i; // 1. Collect changed image files - let changedImages = [], page = 1; + // Use compareCommitsWithBasehead (base.sha...head.sha) instead of pulls.listFiles. + // pulls.listFiles computes the diff against the merge base, which can be a very old ancestor + // when a contributor's fork is far behind main (e.g. after clicking "Update branch" which + // creates a merge commit). This causes hundreds of unrelated files to appear as "changed". + // compareCommitsWithBasehead diffs exactly what this PR's head adds on top of main's current + // HEAD, regardless of the fork's branch history. + const changedImages = []; + let comparePage = 1; while (true) { - const { data: files } = await github.rest.pulls.listFiles({ owner, repo, pull_number: prNumber, per_page: 100, page }); - if (!files.length) break; - for (const f of files) { + const { data: cmp } = await github.rest.repos.compareCommitsWithBasehead({ + owner, repo, + basehead: `${pr.base.sha}...${pr.head.sha}`, + per_page: 100, page: comparePage, + }); + for (const f of (cmp.files || [])) { if (['added', 'modified', 'renamed', 'copied'].includes(f.status) && IMAGE_EXT.test(f.filename)) changedImages.push(f.filename); } - if (files.length < 100) break; - page++; + if (!cmp.files || cmp.files.length < 100) break; + comparePage++; } if (!changedImages.length) { core.info('No changed images.'); return; } core.info(`Checking ${changedImages.length} image(s) for EXIF data.`); // 2. Download & check each image const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'exif-')); - const exifFiles = [], errors = []; + const exifFiles = []; for (const file of changedImages) { const tmpFile = path.join(tmp, file.replace(/[\\/]/g, '__')); try { + // '#' must be percent-encoded in the API path; otherwise GitHub treats it as a URL fragment. + const apiPath = file.replace(/#/g, '%23'); const { data } = await github.rest.repos.getContent({ owner: pr.head.repo.owner.login, repo: pr.head.repo.name, - path: file, ref: pr.head.sha + path: apiPath, ref: pr.head.sha }); let raw; if (!Array.isArray(data) && data.content) raw = Buffer.from(data.content, 'base64'); @@ -72,30 +84,51 @@ jobs: } else throw new Error('Cannot fetch file content'); fs.writeFileSync(tmpFile, raw); + // Check only high-sensitivity fields: + // GPS:* — precise location coordinates + // IPTC address fields — textual shooting address + // XMP location/contact fields — address & contact info written by editing software + const HIGH_SENS_ARGS = [ + '-GPS:GPSLatitude', '-GPS:GPSLongitude', '-GPS:GPSAltitude', + '-GPS:GPSDateStamp', '-GPS:GPSTimeStamp', + '-GPS:GPSSpeed', '-GPS:GPSTrack', '-GPS:GPSImgDirection', + '-IPTC:City', '-IPTC:Country-PrimaryLocationName', + '-IPTC:Sub-location', '-IPTC:Province-State', + '-XMP-photoshop:City', '-XMP-photoshop:Country', '-XMP-photoshop:State', + '-XMP-iptcCore:CreatorWorkEmail', '-XMP-iptcCore:CreatorWorkTelephone', + '-XMP-iptcCore:CreatorCity', '-XMP-iptcCore:CreatorCountry', + '-XMP-iptcCore:CreatorPostalCode', + ]; + const safeFile = tmpFile.replace(/'/g, "'\\\\'"); + // Filter out empty/undef/all-zero placeholder values (e.g. GPS IFD container with no real coords). + // `: *(undef|00:00:00)?$` matches lines like "GPSLatitude: ", "GPSAltitude: undef", "GPSTimeStamp: 00:00:00" const count = parseInt(execSync( - `exiftool -EXIF:all -S -q -- '${tmpFile.replace(/'/g, "'\\''")}' 2>/dev/null | wc -l`, + `exiftool ${HIGH_SENS_ARGS.join(' ')} -S -q -- '${safeFile}' 2>/dev/null | grep -Ev ': *(undef|00:00:00)?$' | wc -l`, { encoding: 'utf-8', timeout: 10000 } ).trim()) || 0; - if (count > 0) { exifFiles.push(file); core.info(`EXIF found: ${file} (${count} tags)`); } - } catch (e) { core.warning(`Check failed: ${file}: ${e.message}`); errors.push(file); } + if (count > 0) { exifFiles.push(file); core.info(`Sensitive EXIF found: ${file} (${count} tags)`); } + } catch (e) { + // Download failures (e.g. diverged fork, encoding issues) are non-fatal. + // Skip the file with a warning rather than failing the entire check. + core.warning(`Skipping ${file}: ${e.message}`); + } finally { try { fs.unlinkSync(tmpFile); } catch {} } } try { fs.rmdirSync(tmp); } catch {} // 3. Report results - if (errors.length) { - core.setFailed(`EXIF check failed for: ${errors.join(', ')}. Please retry or ask a maintainer.`); - return; - } if (exifFiles.length) { const fileList = exifFiles.map(f => `- ${f}`).join('\n'); const body = [ - '我们检测到以下文件包含 EXIF 信息,请移除 EXIF 数据后再次提交:', - 'We detected EXIF data in the following files. Please remove the EXIF data and resubmit:', + '我们检测到以下文件包含**高敏感 EXIF 信息**(如 GPS 坐标、地址或联系方式),请移除后再次提交:', + 'We detected **high-sensitivity EXIF data** (e.g. GPS coordinates, address, or contact fields) in the following files. Please remove them and resubmit:', '', fileList, '', - `📖 [CONTRIBUTING.md](https://github.com/${owner}/${repo}/blob/master/CONTRIBUTING.md)` + '**仅高敏感字段(GPS / 地址 / 联系方式)需要移除,普通摄影参数(光圈、快门等)无需处理。**', + '**Only high-sensitivity fields (GPS / address / contact info) need to be removed. Regular photography parameters (aperture, shutter speed, etc.) are fine to keep.**', + '', + `📖 [EXIF 说明 / EXIF Guide](https://github.com/${owner}/${repo}/blob/master/EXIF.md) · [CONTRIBUTING.md](https://github.com/${owner}/${repo}/blob/master/CONTRIBUTING.md)` ].join('\n'); try { await github.rest.issues.createComment({ owner, repo, issue_number: prNumber, body }); } catch (e) { core.warning(`Cannot comment: ${e.message}. Check repo Settings > Actions > Workflow permissions.`); } diff --git a/EXIF.md b/EXIF.md new file mode 100644 index 0000000..3a2d3b4 --- /dev/null +++ b/EXIF.md @@ -0,0 +1,98 @@ +# 📋 EXIF 是什么?/ What is EXIF? + +## 简介 / Introduction + +**EXIF**(Exchangeable Image File Format)是嵌入在图片文件中的一份"数字档案"。 +每次你用手机或相机拍照,设备会自动把拍摄时的各种信息记录进去——包括你**在哪里拍**、**用什么设备拍**、**什么时候拍**。 + +**EXIF** (Exchangeable Image File Format) is a set of metadata embedded directly in your image file. +Every time you take a photo, your device silently records details about when, where, and how the shot was taken. + +除了 EXIF 本身,图片还可能携带 **IPTC**、**XMP** 等元数据格式,同样能存储位置和个人信息。 +Besides EXIF itself, images may also carry **IPTC** and **XMP** metadata, which can also store location and personal details. + +--- + +## 🔴 高度敏感 / Highly Sensitive + +**这些字段会被本项目的工作流自动检测,发现即要求修正。** +**These fields are automatically detected by our CI workflow. A PR containing them will be asked to remove them.** + +| 字段 / Field | 说明 / Description | +|---|---| +| `GPS:GPSLatitude` / `GPS:GPSLongitude` | 精确地理坐标,可定位到米级 / Precise coordinates, accurate to meters | +| `GPS:GPSAltitude` | 海拔高度 / Altitude | +| `GPS:GPSDateStamp` + `GPS:GPSTimeStamp` | 精确拍摄时刻(结合坐标可还原行踪)/ Exact timestamp combined with location can reveal your routine | +| `GPS:GPSSpeed` / `GPS:GPSTrack` / `GPS:GPSImgDirection` | 移动速度、方向 / Movement speed and direction | +| `IPTC:City` / `IPTC:Country` / `IPTC:Sub-location` / `IPTC:Province-State` | 文字形式的拍摄地址 / Textual shooting address | +| `XMP-photoshop:City` / `XMP-photoshop:Country` / `XMP-photoshop:State` | Photoshop/Lightroom 写入的地址 / Address written by Photoshop or Lightroom | +| `XMP-iptcCore:CreatorWorkEmail` | 创作者工作邮箱 / Creator's work email | +| `XMP-iptcCore:CreatorWorkTelephone` | 创作者电话 / Creator's phone number | +| `XMP-iptcCore:CreatorCity` / `CreatorCountry` / `CreatorPostalCode` | 创作者通讯地址 / Creator's mailing address | + +--- + +## 🟠 较敏感 / Moderately Sensitive + +**这些字段不会导致 PR 被拒,但建议留意——尤其是序列号类信息可以跨图片追踪到同一台设备。** +**These won't block your PR, but be aware — serial numbers in particular can link multiple photos back to the same device.** + +| 字段 / Field | 说明 / Description | +|---|---| +| `EXIF:BodySerialNumber` / `EXIF:CameraSerialNumber` | 设备序列号,可跨图追踪 / Device serial number, linkable across photos | +| `EXIF:LensSerialNumber` | 镜头序列号 / Lens serial number | +| `EXIF:Artist` / `XMP-dc:Creator` | 拍摄者/创作者姓名 / Photographer's name | +| `MakerNotes:FaceName` / `XMP-mwg-rs:PersonInImageName` | 相机人脸识别绑定的人名 / Face recognition tags with names | + +--- + +## 🟡 低度敏感 / Low Sensitivity + +**通常对隐私影响较小,本项目不拦截,保留这些信息是可以接受的。** +**These generally carry low privacy risk and won't be flagged by our workflow.** + +| 字段 / Field | 说明 / Description | +|---|---| +| `EXIF:DateTimeOriginal` | 拍摄时间(无位置时风险较低)/ Capture time (low risk without location) | +| `EXIF:Make` / `EXIF:Model` | 相机/手机品牌型号 / Camera or phone brand and model | +| `EXIF:Software` | 后期处理软件 / Post-processing software | +| `EXIF:FNumber` / `EXIF:ExposureTime` / `EXIF:ISO` | 光圈、快门、感光度等摄影参数 / Photography parameters | +| `EXIF:Copyright` | 版权声明 / Copyright notice | + +--- + +## 🔍 如何自查 / How to Verify + +提交前可用以下命令查看图片中是否残留高敏感信息: +Before submitting, run the following to check if any high-sensitivity data remains: + +```bash +# 安装 exiftool / Install exiftool +# macOS: brew install exiftool +# Ubuntu: sudo apt install libimage-exiftool-perl +# Windows: https://exiftool.org/ + +# 检查单张图片 / Check a single image +exiftool -GPS:all -IPTC:City -IPTC:Country-PrimaryLocationName \ + -XMP-photoshop:City -XMP-iptcCore:CreatorWorkEmail your_photo.jpg + +# 若以上命令无任何输出,说明高敏感字段均已清除。 +# If the command produces no output, all high-sensitivity fields have been removed. +``` + +### 完整清除所有元数据 / Strip all metadata + +```bash +# 删除全部 EXIF/XMP/IPTC(最彻底)/ Remove all metadata (most thorough) +exiftool -all= -o cleaned_photo.jpg your_photo.jpg +``` + +--- + +## 📖 延伸阅读 / Further Reading + +如果你对 EXIF 感兴趣,可以通过以下资源深入了解: +If you'd like to learn more about image metadata: + +- [ExifTool 官方文档 / Official ExifTool Documentation](https://exiftool.org/) +- [EXIF(维基百科)/ EXIF (Wikipedia)](https://en.wikipedia.org/wiki/Exif) diff --git a/GUIDE.md b/GUIDE.md index eef46c3..4366260 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -117,7 +117,10 @@ git push origin main 提交 PR 后,自动化机器人会帮你检查: - 📏 **文件大小检查**:图片是否在 1MB 以内 -- 🔒 **EXIF 信息检查**:图片是否包含隐私元数据 +- 🔒 **EXIF 信息检查**:图片是否包含高敏感元数据(主要是 **GPS 坐标**及地址类字段) + +> 机器人只会拦截高敏感字段(例如 GPS 坐标、IPTC/XMP 地址信息),普通摄影参数(光圈、快门等)不会导致 PR 被拒。 +> 了解详情请阅读 [EXIF.md](EXIF.md)。 如果检查未通过,机器人会留下评论告诉你如何修正。你可以按照提示修改后再次推送(push),PR 会自动更新。 @@ -128,7 +131,11 @@ git push origin main After submitting the PR, automated bots will check: - 📏 **File size check**: Are images under 1MB? -- 🔒 **EXIF data check**: Do images contain private metadata? +- 🔒 **EXIF data check**: Do images contain high-sensitivity metadata (primarily **GPS coordinates** and address fields) + +> The bot only blocks high-sensitivity fields such as GPS coordinates and IPTC/XMP location data. +> Regular photography parameters (aperture, shutter speed, etc.) will not cause your PR to fail. +> See [EXIF.md](EXIF.md) for details. If any check fails, the bot will leave a comment explaining how to fix it. You can make changes and push again — the PR will update automatically. @@ -139,16 +146,20 @@ Once a maintainer approves, your PR will be merged. Congratulations on your firs ## ⚠️ 提交前请注意 / Before You Submit 1. **压缩图片** — 确保每张图片小于 1MB。可以使用 [TinyPNG](https://tinypng.com/) 等在线工具压缩。 -2. **移除 EXIF 信息** — 照片可能包含你的位置、设备等隐私信息。详见 [CONTRIBUTING.md](CONTRIBUTING.md)。 +2. **移除高敏感 EXIF 信息** — 最重要的是删除 **GPS 坐标**,它能精确暴露你的拍摄地点。照片还可能携带地址、联系方式等信息。详见 [EXIF.md](EXIF.md) 和 [CONTRIBUTING.md](CONTRIBUTING.md)。 3. **正确命名文件夹** — 使用你的 GitHub ID 命名,并放在对应首字母目录下。 4. **原创图片** — 只提交你自己的照片,不接受盗图。 +5. **只修改自己的文件夹** — 请不要改动他人的文件夹或其他项目文件,避免给维护者带来不必要的麻烦。 +6. **文件名避免中文和空格** — 文件名中的中文字符或空格可能导致部分系统无法正常显示或下载,请尽量使用英文字母、数字和连字符。 --- 1. **Compress images** — Make sure each image is under 1MB. Use tools like [TinyPNG](https://tinypng.com/). -2. **Remove EXIF data** — Photos may contain your location, device info, etc. See [CONTRIBUTING.md](CONTRIBUTING.md). +2. **Remove sensitive EXIF data** — The most important thing is to strip **GPS coordinates**, which can reveal your exact shooting location. Photos may also carry address and contact-info fields. See [EXIF.md](EXIF.md) and [CONTRIBUTING.md](CONTRIBUTING.md). 3. **Name your folder correctly** — Use your GitHub ID and place it under the matching alphabetical directory. 4. **Original images only** — Only submit your own photos. Stolen images are not accepted. +5. **Only modify your own folder** — Do not touch other contributors' folders or any other project files. +6. **Avoid Chinese characters and spaces in filenames** — Use letters, numbers, and hyphens only to ensure compatibility across all systems. --- @@ -156,5 +167,6 @@ Once a maintainer approves, your PR will be merged. Congratulations on your firs - [项目说明 / Project README](README.md) - [贡献指南 / Contributing Guide](CONTRIBUTING.md) +- [EXIF 说明 / EXIF Guide](EXIF.md) - [项目详细说明 / Detailed README](README_DETAIL.md) - [已合并的 PR 参考 / Merged PRs for reference](https://github.com/Cute-Dress/Dress/pulls?q=is%3Apr+is%3Amerged)