initial commit

inversion · Feb 23, 2024 · b9b83ad · b9b83ad
commit b9b83ad
Show file tree

Hide file tree

Showing 17 changed files with 4,560 additions and 0 deletions.
diff --git a/.env.example b/.env.example
@@ -0,0 +1,3 @@
+FIREFOX_COOKIES_DB_PATH=C:/Users/xxx/AppData/Roaming/Mozilla/Firefox/Profiles/yyy.default/cookies.sqlite
+WIZ_SID=123
+WIZ_AT=ABCDEF:456
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,140 @@
+*.tgz
+/Takeout
+/oauth_tokens.json
+/client_secret.json
+/allMediaItems.jsonl
+
+# tsc output
+out/
+
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+links.sqlite
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,25 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Attach by Process ID",
+      "processId": "${command:PickProcess}",
+      "request": "attach",
+      "skipFiles": [
+        "<node_internals>/**"
+      ],
+      "type": "node"
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Launch Program",
+      "preLaunchTask": "tsc: build - tsconfig.json",
+      "program": "${workspaceFolder}/out/index.js",
+      "console": "internalConsole",
+      "outFiles": ["${workspaceFolder}/out/*.js"],
+      "sourceMaps": true,
+      "internalConsoleOptions": "openOnSessionStart" // automatically switch to the debug console
+    }
+  ]
+}
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Andrew Moss
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,80 @@
+# google-shared-photos-cleanup
+
+Organize Google Photos originating from shared albums and WhatsApp. It processes a Google Takeout and adds all media (photos & videos) that are saved to your Photos feed from Partner Sharing and WhatsApp to new albums. You can then review the albums and delete the photos if everything looks good.
+
+NB: Adding Photos to albums alone does not remove them from your main feed.
+
+NB: Partner photos could probably be deleted more easily by just removing the partner. I'm not aware of an easier way to organize WhatsApp photos though.
+
+## How this tool works
+
+1. Traverses a Photos Takeout (code in `takeout.ts`), building a list of files which come from Partner sharing or WhatsApp respectively.
+1. Using the Photos API (`photosApi.ts`), creates albums called `Cleanup - Partner Photos` and `Cleanup - WhatsApp Photos`.
+1. Fetches the Takeout private item URLs and parses the redirect targets to get the media item IDs for the Takeout items, caching to a sqlite database `links.sqlite`.
+    - Cookies are loaded from a Firefox profile.
+1. Uses the Photos UI API (`photosWebApi.ts`) to add the items that aren't already in these albums respectively.
+    - The lists of existing items in the albums are fetched with the Photos API.
+
+*NB*: This tool is pretty flaky due to opaque rate-limits on the Photos UI API. You'll probably need to re-run it several times - it should be idempotent.
+
+### Why are you using Takeout, Google Photos API *and* the Photos UI API?!
+
+The official Google Photos API is not very useful:
+- API 'apps' can only add photos created by the app to albums, so you can't use the API to organize an existing set of photos.
+- There is no access to the partner sharing / WhatsApp source metadata.
+- The 'Media Item' IDs it exposes are not included or readily mappable to the Takeout image URLs/identifiers.
+
+So we use a Frankenstein combination of the Google Photos API, the undocumented & obfuscated Photos UI API, and Takeout data.
+
+NB: It might be possible to cut Takeout out of the process if the Photos UI API has the relevant info - I'm not sure if it does or not.
+
+## How to use
+
+### Requirements
+
+- Google account
+- Firefox install logged into that Google account (for filling the cookie jar in `photosWebApi.ts`).
+    - The script could be adapted to use hardcoded cookies or another source.
+
+### Checkout
+1. `git clone https://github.com/inversion/google-shared-photos-cleanup`
+1. `cd google-shared-photos-cleanup`
+
+### Takeout download
+1. Generate a [Google Takeout](https://takeout.google.com/) - you can select 'Google Photos' only to save time generating it. I used `tgz` with 50GB segments.
+1. Download the `tgz` files to the current directory.
+1. Extract the metadata JSON from all of them: `for file in *.tgz; do tar -zxvf "$file" --wildcards --no-anchored '*/*.json'; done`
+   - Extracting only the JSON saves time and disk space, but this might take a long time anyway - about 15-20 minutes with my ~150GB library.
+1. There should now be a 'Takeout' directory in the current directory.
+
+### Google Photos API setup
+1. Create a project at console.cloud.google.com
+1. Activate the 'Photos Library API' from the search box.
+1. Hit 'Create Credentials'
+1. Select 'User data'.
+1. 'Add or Remove Scopes'
+1. Select `auth/photoslibrary` and `auth/photoslibrary.sharing`.
+1. Application type 'Desktop App'
+1. Download the client secret JSON and save it as `client_secret.json` in the current directory.
+
+### Configure & run the tool
+1. `cp .env{.example,}`
+1. Open `.env`:
+    1. Open photos.google.com with the logged-in Firefox.
+    1. Set `WIZ_*` values in .env (based on https://kovatch.medium.com/deciphering-google-batchexecute-74991e4e446c):
+        1. Open Firefox dev tools.
+        1. `copy(window.WIZ_global_data.FdrFJe)` -> WIZ_SID
+        1. `copy(window.WIZ_global_data.SNlM0e)` -> WIZ_AT
+    1. Set `FIREFOX_COOKIES_DB_PATH` to your real profile's path.
+1. `npm install`
+1. `npm run main`
+1. (First-run only) Follow the OAuth instructions to generate auth credentials for your Photos API app, which will be saved to `oauth_tokens.json`.
+1. Watch the output and re-run it if it fails. You will probably need to update WIZ_SID and WIZ_AT if it starts consistently failing.
+
+## References/Credits
+
+- https://webapps.stackexchange.com/a/172517 ([jjspierx](https://webapps.stackexchange.com/users/318072/jjspierx))
+- https://kovatch.medium.com/deciphering-google-batchexecute-74991e4e446c
+- https://stackoverflow.com/questions/70616324/what-is-mediaitemid-in-google-photos-api
+- https://github.com/wong2/batchexecute
+
diff --git a/auth.ts b/auth.ts
@@ -0,0 +1,65 @@
+import { google } from "googleapis";
+import { OAuth2Client } from "google-auth-library";
+import * as readline from "readline";
+import * as fs from "fs";
+
+const OAUTH_TOKENS_PATH = "oauth_tokens.json";
+export async function googleAuth(): Promise<OAuth2Client> {
+  const clientSecrets = JSON.parse(
+    fs.readFileSync("client_secret.json", "utf8")
+  );
+  const clientId = clientSecrets.installed.client_id;
+  const clientSecret = clientSecrets.installed.client_secret;
+
+  const oauth2Client = new google.auth.OAuth2(clientId, clientSecret, "http://localhost");
+
+  if (fs.existsSync(OAUTH_TOKENS_PATH)) {
+    const tokens = JSON.parse(fs.readFileSync(OAUTH_TOKENS_PATH, "utf8"));
+
+    if(tokens.expiry_date < Date.now()) {
+      oauth2Client.setCredentials(tokens);
+      const {credentials} = await oauth2Client.refreshAccessToken();
+      fs.writeFileSync(OAUTH_TOKENS_PATH, JSON.stringify(credentials));
+      oauth2Client.setCredentials(credentials);
+    } else {
+      oauth2Client.setCredentials(tokens);
+    }
+
+
+    return oauth2Client;
+  }
+
+  // Generate a url that asks permissions for Google Photos scopes
+  const scopes = [
+    "https://www.googleapis.com/auth/photoslibrary",
+    "https://www.googleapis.com/auth/photoslibrary.sharing",
+  ];
+
+  const url = oauth2Client.generateAuthUrl({
+    access_type: "offline",
+    scope: scopes,
+  });
+
+  console.log("Authorize this app by visiting this url:", url);
+
+  // Create readline interface
+  const rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+
+  // Read the code from the command line
+  const code = await new Promise<string>((resolve) => {
+    rl.question("Enter the code from that page here: ", (code) => {
+      resolve(code);
+      rl.close();
+    });
+  });
+
+  const { tokens } = await oauth2Client.getToken(code);
+  oauth2Client.setCredentials(tokens);
+
+  fs.writeFileSync(OAUTH_TOKENS_PATH, JSON.stringify(tokens));
+
+  return oauth2Client;
+}
diff --git a/batchexecute/README.md b/batchexecute/README.md
@@ -0,0 +1 @@
+Adapted from https://github.com/wong2/batchexecute/tree/main
diff --git a/batchexecute/decode.ts b/batchexecute/decode.ts
@@ -0,0 +1,43 @@
+interface Result {
+  index: number;
+  rpcId: number;
+  data: any;
+}
+
+export function parseBatchExecuteResponse(raw: string): Result[] {
+  // Trim the first 2 lines
+  // ")]}'" and an empty line
+  const envelopesRaw = raw.split("\n").slice(2).join("");
+
+  // Load all envelopes JSON (list of envelopes)
+  const envelopes: any[] = JSON.parse(envelopesRaw);
+
+  const results: Result[] = [];
+
+  for (const envelope of envelopes) {
+    // Ignore envelopes that don't have 'wrb.fr' at [0]
+    // (they're not RPC responses but analytics etc.)
+    if (envelope[0] !== "wrb.fr") {
+      continue;
+    }
+
+    // Index (at [6], string)
+    // Index is 1-based
+    // Index is "generic" if the response contains a single envelope
+    let index: number;
+    if (envelope[6] === "generic") {
+      index = 1;
+    } else {
+      index = parseInt(envelope[6], 10);
+    }
+
+    // Rpcid (at [1])
+    // Rpcid's response (at [2], a JSON string)
+    const rpcid = envelope[1];
+    const data = JSON.parse(envelope[2]);
+
+    results.push({ index, rpcId: rpcid, data });
+  }
+
+  return results;
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Adapted from https://github.com/wong2/batchexecute/tree/main