Skip to content

Add option to only retry non-tagged files in classify #109

Add option to only retry non-tagged files in classify

Add option to only retry non-tagged files in classify #109

name: Cluster-faces command test
on:
pull_request:
paths:
- 'lib/**'
- 'src/**'
push:
branches:
- master
paths:
- 'lib/**'
- 'src/**'
env:
APP_NAME: recognize
jobs:
php:
runs-on: ubuntu-20.04
strategy:
# do not stop on another job's failure
fail-fast: false
matrix:
php-versions: ['8.2']
databases: ['sqlite']
server-versions: ['master']
pure-js-mode: ['false']
name: Test cluster-faces command on ${{ matrix.server-versions }} wasm:${{ matrix.pure-js-mode }}
env:
MYSQL_PORT: 4444
PGSQL_PORT: 4445
services:
mysql:
image: mariadb:10.5
ports:
- 4444:3306/tcp
env:
MYSQL_ROOT_PASSWORD: rootpassword
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 5
postgres:
image: postgres
ports:
- 4445:5432/tcp
env:
POSTGRES_USER: root
POSTGRES_PASSWORD: rootpassword
POSTGRES_DB: nextcloud
options: --health-cmd pg_isready --health-interval 5s --health-timeout 2s --health-retries 5
steps:
- name: Checkout server
uses: actions/checkout@v2
with:
repository: nextcloud/server
ref: ${{ matrix.server-versions }}
- name: Checkout submodules
shell: bash
run: |
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
git submodule sync --recursive
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
- name: install ssl-cert
if: env.ACT # Skip this on normal GitHub Actions
run: sudo apt update && sudo apt install -y ssl-cert
- name: Set up php ${{ matrix.php-versions }}
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php-versions }}
tools: phpunit
extensions: mbstring, iconv, fileinfo, intl, sqlite, pdo_mysql, pdo_sqlite, pgsql, pdo_pgsql, gd, zip
- name: Checkout app
uses: actions/checkout@v2
with:
path: apps/${{ env.APP_NAME }}
- name: Read package.json node and npm engines version
uses: skjnldsv/[email protected]
id: versions
with:
path: apps/${{ env.APP_NAME }}
fallbackNode: '^12'
fallbackNpm: '^6'
- name: Set up node ${{ steps.versions.outputs.nodeVersion }}
uses: actions/setup-node@v2
with:
node-version: ${{ steps.versions.outputs.nodeVersion }}
- name: Set up npm ${{ steps.versions.outputs.npmVersion }}
run: npm i -g npm@"${{ steps.versions.outputs.npmVersion }}"
- name: install make wget unzip
if: env.ACT # Skip this on normal GitHub Actions
run: sudo apt update && sudo apt install -y make wget unzip
- name: Install app
working-directory: apps/${{ env.APP_NAME }}
run: |
composer install --no-dev
make all
make remove-binaries
make remove-devdeps
- name: Set up Nextcloud and install app
if: ${{ matrix.databases != 'pgsql'}}
run: |
sleep 25
mkdir data
./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$MYSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password
./occ app:enable -vvv -f ${{ env.APP_NAME }}
php -S localhost:8080 &
- name: Set up Nextcloud and install app
if: ${{ matrix.databases == 'pgsql'}}
run: |
sleep 25
mkdir data
./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud --database-host=127.0.0.1 --database-port=$PGSQL_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass password
./occ app:enable -vvv -f ${{ env.APP_NAME }}
php -S localhost:8080 &
- name: Install
run: |
./occ app:enable -vvv ${{ env.APP_NAME }}
- name: Remove unnecessary models to make space
run: |
rm -rf apps/recognize/models
- uses: actions/cache/restore@v3
id: photos-cache
with:
path: data/admin/files/
key: https://cloud.marcelklehr.de/s/PkNYbmKnwMiQMFD/download/IMDb-Face.zip
- name: Upload photos
if: steps.photos-cache.outputs.cache-hit != 'true'
run: |
mkdir -p data/admin/files/
cd data/admin/files
wget https://cloud.marcelklehr.de/s/PkNYbmKnwMiQMFD/download/IMDb-Face.zip
unzip IMDb-Face.zip
rm IMDb-Face.zip
- uses: actions/cache/save@v3
with:
path: data/admin/files/
key: https://cloud.marcelklehr.de/s/PkNYbmKnwMiQMFD/download/IMDb-Face.zip
- name: Set config
run: |
./occ config:app:set --value ${{ matrix.pure-js-mode }} recognize tensorflow.purejs
./occ config:app:set --value true recognize faces.enabled
# only use one core. GH actions has 2
./occ config:app:set --value 1 recognize tensorflow.cores
- uses: actions/cache/restore@v3
id: db-cache
with:
path: data/nextcloud.db
key: ${{ runner.os }}-${{ matrix.server-versions }}-${{ hashFiles('data/admin/files/**', 'apps/recognize/src/classifier_faces.js', 'apps/recognize/lib/Classifiers/Classifier.php', 'apps/recognize/lib/Classifiers/Images/ClusteringFaceClassifier.php') }}-${{ matrix.pure-js-mode }}
- name: Run classifier
if: steps.db-cache.outputs.cache-hit != 'true'
env:
GITHUB_REF: ${{ github.ref }}
run: |
./occ files:scan admin
./occ recognize:classify
- uses: actions/cache/save@v3
with:
path: data/nextcloud.db
key: ${{ steps.db-cache.outputs.cache-primary-key }}
- name: Reduce space
run: |
for dirname in data/admin/files/IMDb-Face/*; do truncate -s 0 "${dirname}/*"; done
- name: install sqlite3
if: env.ACT # Skip this on normal GitHub Actions
run: sudo apt update && sudo apt install -y sqlite3
- name: Create detection summary
run: |
sqlite3 data/nextcloud.db "select x, y, path from oc_recognize_face_detections d LEFT JOIN oc_filecache c ON c.fileid = d.file_id where user_id = 'admin' ORDER BY path;" > out.txt
- uses: actions/cache/restore@v3
id: clustering-cache
with:
path: out.json
key: ${{ runner.os }}-${{ hashFiles('out.txt', 'apps/recognize/src/classifier_faces.js', 'apps/recognize/lib/Classifiers/Classifier.php', 'apps/recognize/lib/Classifiers/Images/ClusteringFaceClassifier.php', 'apps/recognize/lib/Clustering/**', 'apps/recognize/lib/Dav/**', 'apps/recognize/lib/Service/FaceClusterAnalyzer.php', 'apps/recognize/lib/Command/ClusterFaces.php') }}-${{ matrix.pure-js-mode }}
- name: Run clustering
if: steps.clustering-cache.outputs.cache-hit != 'true'
run: |
./occ recognize:cluster-faces -b 10000
./occ recognize:cluster-faces -b 10000
./occ recognize:cluster-faces -b 10000
./occ recognize:cluster-faces -b 10000
./occ recognize:cluster-faces -b 10000
./occ recognize:cluster-faces -b 10000
- name: install python3 python3-pip jq curl
if: steps.clustering-cache.outputs.cache-hit != 'true' && env.ACT # Skip this on normal GitHub Actions
run: sudo apt update && sudo apt install -y python3 python3-pip jq curl
- name: Install xq
if: steps.clustering-cache.outputs.cache-hit != 'true'
run: |
pip install yq
- name: Download face assignments
if: steps.clustering-cache.outputs.cache-hit != 'true'
run: |
curl -u 'admin:password' --request PROPFIND 'http://localhost:8080/remote.php/dav/recognize/admin/faces/' --header 'Depth: 2' --data '<?xml version="1.0"?>
<d:propfind xmlns:d="DAV:"
xmlns:oc="http://owncloud.org/ns"
xmlns:nc="http://nextcloud.org/ns"
xmlns:ocs="http://open-collaboration-services.org/ns">
<d:prop>
<d:getcontentlength />
<d:getcontenttype />
<d:getetag />
<d:getlastmodified />
<d:resourcetype />
<nc:face-detections />
<nc:file-metadata-size />
<nc:has-preview />
<nc:realpath />
<oc:favorite />
<oc:fileid />
<oc:permissions />
<nc:nbItems />
</d:prop>
</d:propfind>' > out.xml
cat out.xml
- name: Parse face assignments
if: steps.clustering-cache.outputs.cache-hit != 'true'
run: |
cat out.xml | xq '.["d:multistatus"]["d:response"] | map(select(.["d:href"] | test("faces/.+?/.+?"))) | map({"href": .["d:href"], "realpath": .["d:propstat"][0]["d:prop"]["nc:realpath"], "face-detections": .["d:propstat"][0]["d:prop"]["nc:face-detections"] | fromjson | map({userId, x, y, height, width, clusterId}) })' > out.json
cat out.json
- uses: actions/cache/save@v3
with:
path: out.json
key: ${{ steps.clustering-cache.outputs.cache-primary-key }}
- name: Download IMDb-Face.csv
working-directory: apps/${{ env.APP_NAME }}/tests/res
run: |
wget https://cloud.marcelklehr.de/s/ZKe7MY7gZRRxBPq/download/IMDb-Face-csv.zip
unzip IMDb-Face-csv.zip
rm IMDb-Face-csv.zip
- name: Analyse face assignments
run: |
node -e "
const COLUMN_NAME = 0
const COLUMN_URL = 5
const COLUMN_RECT = 3
const COLUMN_DIMS = 4
const csv = fs.readFileSync(__dirname + '/apps/recognize/tests/res/IMDb-Face.csv')
.toString('utf8')
.split('\n')
.map(line => line.split(','))
// remove csv header
csv.shift()
const names = [...new Set(csv.map(image => image[COLUMN_NAME])).values()]
const selectedNames = names.slice(0, 2000)
const limitedCsv = selectedNames.flatMap(name => {
return csv.filter(line => line[COLUMN_NAME] === name)
})
const allDetections = fs.readFileSync(__dirname + '/out.txt').toString('utf8').trim().split('\n').map(line => line.split('|'))
const json = require(__dirname + '/out.json');
const facesByCluster = json
.reduce((acc, face) => {
const clusterId = parseInt(face.href.split('/')[6]);
acc[clusterId] = [...(acc[clusterId] ?? []), face.realpath.split('/')[4]];
return acc
}, {});
const targetFaces = json
.filter(face => {
return limitedCsv
.some(entry => {
if (entry[COLUMN_NAME] === face.realpath.split('/')[4] && entry[COLUMN_URL].split('/').pop() === face.realpath.split('/').pop()) {
let dims = entry[COLUMN_DIMS].split(' ').map(i => parseInt(i))
dims = {x: dims[1], y: dims[0]}
const rect = entry[COLUMN_RECT].split(' ').map(i => parseInt(i))
return Math.abs(face['face-detections'][0].x - rect[0] / dims.x) < 0.05 && Math.abs(face['face-detections'][0].y - rect[1] / dims.y) < 0.05
}
return false
})
})
const targetFacesPerIdentity = targetFaces.reduce((acc, face) => {
const name = face.realpath.split('/')[4]
acc[name] = acc[name] ?? []
acc[name].push(face)
return acc
},{})
const targetFacesByCluster = targetFaces
.reduce((acc, face) => {
const clusterId = parseInt(face.href.split('/')[6]);
acc[clusterId] = [...(acc[clusterId] ?? []), face.realpath.split('/')[4]];
return acc
}, {});
console.log(facesByCluster);
console.log(targetFacesByCluster);
const clusterTargetAccuracies = Object.entries(targetFacesByCluster)
.filter(([clusterId, names]) => names.length > 1)
.map(([clusterId, names]) =>
[...new Set(names).values()]
.map(name1 =>
names.filter(name2 => name1 === name2).length
).sort().reverse()[0] / names.length
);
const clusterAccuracies = Object.entries(facesByCluster)
.map(([clusterId, names]) =>
[...new Set(names).values()]
.map(name1 =>
names.filter(name2 => name1 === name2).length
).sort().reverse()[0] / names.length
);
const clusteredFaces = Object.entries(facesByCluster)
.map(([clusterId, names]) => names.length)
.reduce((acc, val) => acc+val, 0)
const clusteredTargetFaces = Object.entries(targetFacesByCluster)
.map(([clusterId, names]) => names.length)
.reduce((acc, val) => acc+val, 0)
const clusteredTargetFacesByIdentity = Object.entries(targetFacesByCluster)
.map(([clusterId, names]) =>
[...new Set(names).values()]
.map(name1 =>
[name1, names.filter(name2 => name1 === name2).length]
).sort(([name1, size1], [name2, size2]) => size1 - size2).reverse()[0]
)
.filter(([name,size]) => size > 1)
.reduce((acc, [name, size]) => {
acc[name] = (acc[name] ?? 0) + size
return acc
}, Object.fromEntries(Object.entries(targetFacesPerIdentity).map(([key]) => [key, 0])))
console.log(targetFacesPerIdentity)
console.log(clusteredTargetFacesByIdentity)
const averageTargetFacesPerIdentity = Object.entries(targetFacesPerIdentity).reduce((acc, [name, detections]) => acc+detections.length, 0) / Object.entries(targetFacesPerIdentity).length
const averageClusteredTargetFacesByIdentity = Object.entries(clusteredTargetFacesByIdentity).reduce((acc, [name, size]) => acc+size, 0) / Object.entries(clusteredTargetFacesByIdentity).length
const clusteredTargetFacesByIdentityRate = Object.entries(clusteredTargetFacesByIdentity)
.reduce((acc, [name, size]) => acc + size / targetFacesPerIdentity[name].length, 0) / Object.entries(clusteredTargetFacesByIdentity).length
const identitiesWithPhotos = $(find data/admin/files/IMDb-Face -type d ! -empty | wc -l)
const identitiesWithDetections = Object.entries(targetFacesPerIdentity).length
const identitiesWithEnoughDetections = Object.entries(targetFacesPerIdentity).filter(([name, detections]) => detections.length > 1).length
const identitiesWithClusters = Object.entries(clusteredTargetFacesByIdentity).filter(([name, size]) => size > 1).length
const identitiesWithClustersRate = identitiesWithClusters / identitiesWithEnoughDetections
const detectedFaces = $(sqlite3 data/nextcloud.db "select count(*) from oc_recognize_face_detections where user_id = 'admin';")
const detectedTargetFaces = allDetections.filter(detection => {
if(detection.length < 3) return false
const x = Number(detection[0])
const y = Number(detection[1])
const path = detection[2]
return limitedCsv
.some(entry => {
if (entry[COLUMN_NAME] === path.split('/')[2] && entry[COLUMN_URL].split('/').pop().split('.jpg')[0] === path.split('/').pop().split('.jpg')[0]) {
let dims = entry[COLUMN_DIMS].split(' ').map(i => parseInt(i))
dims = {x: dims[1], y: dims[0]}
const rect = entry[COLUMN_RECT].split(' ').map(i => parseInt(i))
return Math.abs(x - rect[0] / dims.x) < 0.05 && Math.abs(y - rect[1] / dims.y) < 0.05
}
return false
})
}).length
const totalPhotos = $(ls data/admin/files/IMDb-Face/* | wc -l)
const detectedFacesRate = detectedFaces / totalPhotos
const clusteredTargetFacesRate = clusteredTargetFaces / detectedTargetFaces
const clusteredFacesRate = clusteredFaces / detectedFaces
const averageClusterAccuracy = clusterAccuracies.reduce((acc, val) => acc+val, 0)/clusterAccuracies.length
const averageClusterTargetAccuracy = clusterTargetAccuracies.reduce((acc, val) => acc+val, 0)/clusterTargetAccuracies.length
const targettedShitClusterRate = clusterTargetAccuracies.filter((val) => val < 0.5).length/clusterTargetAccuracies.length
const shitClusterRate = clusterAccuracies.filter((val) => val < 0.5).length/clusterAccuracies.length
console.log({ clusterAccuracies });
console.log({ clusterTargetAccuracies });
console.log({ totalPhotos });
console.log({ detectedFaces });
console.log({ detectedFacesRate });
console.log({ detectedTargetFaces });
console.log({ clusteredFaces });
console.log({ clusteredFacesRate })
console.log({ clusteredTargetFaces })
console.log({ clusteredTargetFacesRate })
console.log({ averageTargetFacesPerIdentity })
console.log({ averageClusteredTargetFacesByIdentity })
console.log({ clusteredTargetFacesByIdentityRate })
console.log({ identitiesWithPhotos })
console.log({ identitiesWithDetections })
console.log({ identitiesWithEnoughDetections })
console.log({ identitiesWithClusters })
console.log({ identitiesWithClustersRate })
console.log({ shitClusterRate })
console.log({ targettedShitClusterRate })
console.log({ averageClusterAccuracy })
console.log({ averageClusterTargetAccuracy })
console.log({ weightedAccuracy: averageClusterAccuracy * clusteredFacesRate })
console.log({ weightedTargetAccuracy: averageClusterTargetAccuracy * clusteredTargetFacesRate })
const combinedScore = (averageClusterTargetAccuracy * identitiesWithClustersRate * clusteredTargetFacesByIdentityRate * clusteredTargetFacesRate) ** (1/4)
console.log({ combinedScore, minCombinedScore: 0.6 })
if (combinedScore < 0.6 || combinedScore > 1.0) {
console.log('Benchmark result: Bad')
process.exit(1)
} else {
console.log('Benchmark result: Good')
}
"