Skip to content

Commit

Permalink
Do not match album title for single candidates
Browse files Browse the repository at this point in the history
See the comment added for the rationale. This helps quite a bit. Also
strip "ov" (original version) which helps for only ~20 or so listens.

Before:

    Matched 153052 of 162052 (94.4%).
     - 107907 of 162052 (66.6%) SearchExact
     -  30171 of 162052 (18.6%) MbidTitle
     -   3029 of 162052 ( 1.9%) SearchAlbumPrefix
     -   8519 of 162052 ( 5.3%) SearchNormalized
     -   6040 of 162052 ( 3.7%) Miss
     -    477 of 162052 ( 0.3%) Ambiguous
     -   2483 of 162052 ( 1.5%) SearchFail
     -   3426 of 162052 ( 2.1%) SearchFuzzy

After:

    Matched 154913 of 162052 (95.6%).
     - 107907 of 162052 (66.6%) SearchExact
     -  30171 of 162052 (18.6%) MbidTitle
     -   3029 of 162052 ( 1.9%) SearchAlbumPrefix
     -   8519 of 162052 ( 5.3%) SearchNormalized
     -   5287 of 162052 ( 3.3%) SearchFuzzy
     -    637 of 162052 ( 0.4%) SearchFail
     -    477 of 162052 ( 0.3%) Ambiguous
     -   6025 of 162052 ( 3.7%) Miss
  • Loading branch information
ruuda committed May 26, 2024
1 parent 3388226 commit 98386ba
Showing 1 changed file with 27 additions and 21 deletions.
48 changes: 27 additions & 21 deletions src/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,23 +171,28 @@ fn match_listen(
&words_track_title[..] == &words[..title_words_len]
};

let mut words_album_entry = Vec::new();
normalize_words(album_title, &mut words_album_entry);
simplify_normalized_words(&mut words_album_entry);

let mut words_album_listen = Vec::new();
normalize_words(&listen.album, &mut words_album_listen);
simplify_normalized_words(&mut words_album_listen);
let album_match = words_album_entry == words_album_listen;
// We searched for track title and artist. If we got only a single
// candidate, then even if the album is not a match, it's either that
// track, or not a match at all. So in that case we might as well ignore
// the album and attribute a few more listens to something in the
// library, even if it is not the exact version. Maybe the listen is
// from a compliation album, but the library now contains the studio
// album, or something like this. If there is no ambiguity, go for it.
let album_match = if n_candidates == 1 {
true
} else {
let mut words_album_entry = Vec::new();
normalize_words(album_title, &mut words_album_entry);
simplify_normalized_words(&mut words_album_entry);

let mut words_album_listen = Vec::new();
normalize_words(&listen.album, &mut words_album_listen);
simplify_normalized_words(&mut words_album_listen);
words_album_entry == words_album_listen
};

if track_match && album_match {
results.push(Match::SearchFuzzy(track_id));
} else {
println!("MISMATCH: {listen:?}");
println!(" Title L: {:?}", &words[..title_words_len]);
println!(" Title R: {:?}", words_track_title);
println!(" Album L: {:?}", words_album_listen);
println!(" Album R: {:?}", words_album_entry);
}
}

Expand All @@ -203,13 +208,14 @@ fn match_listen(
fn simplify_normalized_words(words: &mut Vec<String>) {
// Drop uninformative words and punctuation.
words.retain(|w| match w.as_ref() {
"the" => false,
"!" => false,
"&" => false,
"a" => false,
"and" => false,
"ov" => false,
"part" => false,
"pt" => false,
"a" => false,
"&" => false,
"!" => false,
"the" => false,
_ => true,
});

Expand Down Expand Up @@ -287,10 +293,10 @@ pub fn match_listens(
println!(" - {:6} of {:6} ({:4.1}%) MbidTitle", match_mbid_title, total, (match_mbid_title as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) SearchAlbumPrefix", match_search_album_prefix, total, (match_search_album_prefix as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) SearchNormalized", match_search_normalized, total, (match_search_normalized as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) Miss", misses, total, (misses as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) Ambiguous", ambiguous, total, (ambiguous as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) SearchFail", search_fail, total, (search_fail as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) SearchFuzzy", match_search_fuzzy, total, (match_search_fuzzy as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) SearchFail", search_fail, total, (search_fail as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) Ambiguous", ambiguous, total, (ambiguous as f32 * 100.0) / total as f32);
println!(" - {:6} of {:6} ({:4.1}%) Miss", misses, total, (misses as f32 * 100.0) / total as f32);

Ok(())
}

0 comments on commit 98386ba

Please sign in to comment.