Skip to content

Commit

Permalink
More efficient matching (#159)
Browse files Browse the repository at this point in the history
- Avoid the recomputation of pairs in self-matching: this should speed up SfM matching for pairs from retrieval or poses
- Change the pair format from `{name0}_{name1}` to `{name0}/{name1}` to speed up HDF5 hashing, but keep backward compatibility

Co-authored-by: Mihai Dusmanu <[email protected]>
  • Loading branch information
skydes and mihaidusmanu authored Mar 6, 2022
1 parent d24621c commit 653d3e2
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 49 deletions.
21 changes: 8 additions & 13 deletions hloc/localize_sfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from pathlib import Path
from collections import defaultdict
from typing import Dict, List, Union
import h5py
from tqdm import tqdm
import pickle
import pycolmap

from . import logger
from .utils.parsers import parse_image_lists, parse_retrieval, names_to_pair
from .utils.io import get_keypoints, get_matches
from .utils.parsers import parse_image_lists, parse_retrieval


def do_covisibility_clustering(frame_ids: List[int],
Expand Down Expand Up @@ -73,8 +73,7 @@ def pose_from_cluster(
matches_path: Path,
**kwargs):

with h5py.File(features_path, 'r') as f:
kpq = f[qname]['keypoints'].__array__()
kpq = get_keypoints(features_path, qname)
kpq += 0.5 # COLMAP coordinates

kp_idx_to_3D = defaultdict(list)
Expand All @@ -88,15 +87,11 @@ def pose_from_cluster(
points3D_ids = np.array([p.point3D_id if p.has_point3D() else -1
for p in image.points2D])

pair = names_to_pair(qname, image.name)
with h5py.File(matches_path, 'r') as f:
matches = f[pair]['matches0'].__array__()
valid = np.where(matches > -1)[0]
valid = valid[points3D_ids[matches[valid]] != -1]
num_matches += len(valid)

for idx in valid:
id_3D = points3D_ids[matches[idx]]
matches, _ = get_matches(matches_path, qname, image.name)
matches = matches[points3D_ids[matches[:, 1]] != -1]
num_matches += len(matches)
for idx, m in matches:
id_3D = points3D_ids[m]
kp_idx_to_3D_to_db[idx][id_3D].append(i)
# avoid duplicate observations
if id_3D not in kp_idx_to_3D[idx]:
Expand Down
42 changes: 29 additions & 13 deletions hloc/match_features.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import argparse
from typing import Union, Optional, Dict
from typing import Union, Optional, Dict, List, Tuple
from pathlib import Path
import pprint
import collections.abc as collections
Expand All @@ -9,7 +9,7 @@

from . import matchers, logger
from .utils.base_model import dynamic_load
from .utils.parsers import names_to_pair, parse_retrieval
from .utils.parsers import names_to_pair, names_to_pair_old, parse_retrieval
from .utils.io import list_h5_names


Expand Down Expand Up @@ -95,6 +95,27 @@ def main(conf: Dict,
return matches


def find_unique_new_pairs(pairs_all: List[Tuple[str]], match_path: Path = None):
'''Avoid to recompute duplicates to save time.'''
pairs = set()
for i, j in pairs_all:
if (j, i) not in pairs:
pairs.add((i, j))
pairs = list(pairs)
if match_path is not None and match_path.exists():
with h5py.File(str(match_path), 'r') as fd:
pairs_filtered = []
for i, j in pairs:
if (names_to_pair(i, j) in fd or
names_to_pair(j, i) in fd or
names_to_pair_old(i, j) in fd or
names_to_pair_old(j, i) in fd):
continue
pairs_filtered.append((i, j))
return pairs_filtered
return pairs


@torch.no_grad()
def match_from_paths(conf: Dict,
pairs_path: Path,
Expand All @@ -112,25 +133,21 @@ def match_from_paths(conf: Dict,
raise FileNotFoundError(f'Reference feature file {path}.')
name2ref = {n: i for i, p in enumerate(feature_paths_refs)
for n in list_h5_names(p)}
match_path.parent.mkdir(exist_ok=True, parents=True)

assert pairs_path.exists(), pairs_path
pairs = parse_retrieval(pairs_path)
pairs = [(q, r) for q, rs in pairs.items() for r in rs]
pairs = find_unique_new_pairs(pairs, None if overwrite else match_path)
if len(pairs) == 0:
logger.info('Skipping the matching.')
return

device = 'cuda' if torch.cuda.is_available() else 'cpu'
Model = dynamic_load(matchers, conf['model']['name'])
model = Model(conf['model']).eval().to(device)

match_path.parent.mkdir(exist_ok=True, parents=True)
skip_pairs = set(list_h5_names(match_path)
if match_path.exists() and not overwrite else ())

for (name0, name1) in tqdm(pairs, smoothing=.1):
pair = names_to_pair(name0, name1)
# Avoid to recompute duplicates to save time
if pair in skip_pairs or names_to_pair(name0, name1) in skip_pairs:
continue

data = {}
with h5py.File(str(feature_path_q), 'r') as fd:
grp = fd[name0]
Expand All @@ -146,6 +163,7 @@ def match_from_paths(conf: Dict,
data = {k: v[None] for k, v in data.items()}

pred = model(data)
pair = names_to_pair(name0, name1)
with h5py.File(str(match_path), 'a') as fd:
if pair in fd:
del fd[pair]
Expand All @@ -157,8 +175,6 @@ def match_from_paths(conf: Dict,
scores = pred['matching_scores0'][0].cpu().half().numpy()
grp.create_dataset('matching_scores0', data=scores)

skip_pairs.add(pair)

logger.info('Finished exporting matches.')


Expand Down
25 changes: 4 additions & 21 deletions hloc/triangulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
import sys
from pathlib import Path
from tqdm import tqdm
import h5py
import numpy as np
import pycolmap

from . import logger
from .utils.database import COLMAPDatabase
from .utils.parsers import names_to_pair
from .utils.io import get_keypoints, get_matches


class OutputCapture:
Expand Down Expand Up @@ -53,15 +51,13 @@ def create_db_from_model(reconstruction, database_path):

def import_features(image_ids, database_path, features_path):
logger.info('Importing features into the database...')
hfile = h5py.File(str(features_path), 'r')
db = COLMAPDatabase.connect(database_path)

for image_name, image_id in tqdm(image_ids.items()):
keypoints = hfile[image_name]['keypoints'].__array__()
keypoints = get_keypoints(features_path, image_name)
keypoints += 0.5 # COLMAP origin
db.add_keypoints(image_id, keypoints)

hfile.close()
db.commit()
db.close()

Expand All @@ -73,35 +69,22 @@ def import_matches(image_ids, database_path, pairs_path, matches_path,
with open(str(pairs_path), 'r') as f:
pairs = [p.split() for p in f.readlines()]

hfile = h5py.File(str(matches_path), 'r')
db = COLMAPDatabase.connect(database_path)

matched = set()
for name0, name1 in tqdm(pairs):
id0, id1 = image_ids[name0], image_ids[name1]
if len({(id0, id1), (id1, id0)} & matched) > 0:
continue
pair = names_to_pair(name0, name1)
if pair not in hfile:
raise ValueError(
f'Could not find pair {(name0, name1)}... '
'Maybe you matched with a different list of pairs? '
f'Reverse in file: {names_to_pair(name0, name1) in hfile}.')

matches = hfile[pair]['matches0'].__array__()
valid = matches > -1
matches, scores = get_matches(matches_path, name0, name1)
if min_match_score:
scores = hfile[pair]['matching_scores0'].__array__()
valid = valid & (scores > min_match_score)
matches = np.stack([np.where(valid)[0], matches[valid]], -1)

matches = matches[scores > min_match_score]
db.add_matches(id0, id1, matches)
matched |= {(id0, id1), (id1, id0)}

if skip_geometric_verification:
db.add_two_view_geometry(id0, id1, matches)

hfile.close()
db.commit()
db.close()

Expand Down
43 changes: 43 additions & 0 deletions hloc/utils/io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from typing import Tuple
from pathlib import Path
import numpy as np
import cv2
import h5py

from .parsers import names_to_pair, names_to_pair_old


def read_image(path, grayscale=False):
if grayscale:
Expand All @@ -23,3 +28,41 @@ def visit_fn(_, obj):
names.append(obj.parent.name.strip('/'))
fd.visititems(visit_fn)
return list(set(names))


def get_keypoints(path: Path, name: str) -> np.ndarray:
with h5py.File(str(path), 'r') as hfile:
p = hfile[name]['keypoints'].__array__()
return p


def find_pair(hfile: h5py.File, name0: str, name1: str):
pair = names_to_pair(name0, name1)
if pair in hfile:
return pair, False
pair = names_to_pair(name1, name0)
if pair in hfile:
return pair, True
# older, less efficient format
pair = names_to_pair_old(name0, name1)
if pair in hfile:
return pair, False
pair = names_to_pair_old(name1, name0)
if pair in hfile:
return pair, True
raise ValueError(
f'Could not find pair {(name0, name1)}... '
'Maybe you matched with a different list of pairs? ')


def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
with h5py.File(str(path), 'r') as hfile:
pair, reverse = find_pair(hfile, name0, name1)
matches = hfile[pair]['matches0'].__array__()
scores = hfile[pair]['matching_scores0'].__array__()
idx = np.where(matches != -1)[0]
matches = np.stack([idx, matches[idx]], -1)
if reverse:
matches = np.flip(matches, -1)
scores = scores[idx]
return matches, scores
8 changes: 6 additions & 2 deletions hloc/utils/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,9 @@ def parse_retrieval(path):
return dict(retrieval)


def names_to_pair(name0, name1):
return '_'.join((name0.replace('/', '-'), name1.replace('/', '-')))
def names_to_pair(name0, name1, separator='/'):
return separator.join((name0.replace('/', '-'), name1.replace('/', '-')))


def names_to_pair_old(name0, name1):
return names_to_pair(name0, name1, separator='_')

0 comments on commit 653d3e2

Please sign in to comment.