Commit f286f9eb authored by Lukáš Lalinský's avatar Lukáš Lalinský

Limit the alignment offset of tracks

parent b0e86516
......@@ -7,6 +7,9 @@ TRACK_MERGE_THRESHOLD = 0.7
# minimum similarity with the worst matching fingerprint
TRACK_GROUP_MERGE_THRESHOLD = 0.3
# maxumim alignment differences of fingerprints in a track
TRACK_MAX_OFFSET = 80
# minimum similarity for two fingerprints to be considered the same
FINGERPRINT_MERGE_THRESHOLD = 0.95
......
......@@ -15,7 +15,7 @@ MAX_LENGTH_DIFF = 7
PARTS = ((1, 20), (21, 100))
PART_SEARCH_SQL = """
SELECT id, track_id, score FROM (
SELECT id, track_id, acoustid_compare2(fingerprint, query) AS score
SELECT id, track_id, acoustid_compare2(fingerprint, query, %(max_offset)s) AS score
FROM fingerprint, (SELECT %(fp)s::int4[] AS query) q
WHERE
length BETWEEN %(length)s - %(max_length_diff)s AND %(length)s + %(max_length_diff)s AND
......@@ -31,14 +31,14 @@ def decode_fingerprint(fingerprint_string):
return fingerprint
def lookup_fingerprint(conn, fp, length, good_enough_score, min_score, fast=False):
def lookup_fingerprint(conn, fp, length, good_enough_score, min_score, fast=False, max_offset=0):
"""Search for a fingerprint in the database"""
matched = []
best_score = 0.0
for part_start, part_length in PARTS:
params = dict(fp=fp, length=length, part_start=part_start,
part_length=part_length, max_length_diff=MAX_LENGTH_DIFF,
min_score=min_score)
min_score=min_score, max_offset=max_offset)
with closing(conn.execute(PART_SEARCH_SQL, params)) as result:
for row in result:
matched.append(row)
......
......@@ -55,7 +55,7 @@ def import_submission(conn, submission):
matches = lookup_fingerprint(conn,
submission['fingerprint'], submission['length'],
const.FINGERPRINT_MERGE_THRESHOLD,
const.TRACK_MERGE_THRESHOLD, fast=True)
const.TRACK_MERGE_THRESHOLD, fast=True, max_offset=const.TRACK_MAX_OFFSET)
fingerprint = {
'id': None,
'track_id': None,
......
......@@ -210,7 +210,7 @@ def can_merge_tracks(conn, track_ids):
cond = sql.and_(fp1.c.track_id.in_(track_ids), fp2.c.track_id.in_(track_ids))
query = sql.select([
fp1.c.track_id, fp2.c.track_id,
sql.func.min(sql.func.acoustid_compare2(fp1.c.fingerprint, fp2.c.fingerprint)),
sql.func.min(sql.func.acoustid_compare2(fp1.c.fingerprint, fp2.c.fingerprint, const.TRACK_MAX_OFFSET)),
], cond, from_obj=src).group_by(fp1.c.track_id, fp2.c.track_id).order_by(fp1.c.track_id, fp2.c.track_id)
rows = conn.execute(query)
merges = {}
......@@ -230,7 +230,7 @@ def can_merge_tracks(conn, track_ids):
def can_add_fp_to_track(conn, track_id, fingerprint):
cond = schema.fingerprint.c.track_id == track_id
query = sql.select([
sql.func.min(sql.func.acoustid_compare2(schema.fingerprint.c.fingerprint, fingerprint)),
sql.func.min(sql.func.acoustid_compare2(schema.fingerprint.c.fingerprint, fingerprint, const.TRACK_MAX_OFFSET)),
], cond, from_obj=schema.fingerprint)
score = conn.execute(query).scalar()
if score < const.TRACK_GROUP_MERGE_THRESHOLD:
......
-- Adjust this setting to control where the objects get created.
SET search_path = public;
CREATE OR REPLACE FUNCTION acoustid_compare(int4[], int4[]) RETURNS float4
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE STRICT -- marked as VOLATILE to avoid multiple calls, even though the results are immutable
COST 1000;
CREATE OR REPLACE FUNCTION acoustid_compare2(int4[], int4[], int DEFAULT 0) RETURNS float4
AS 'MODULE_PATHNAME'
LANGUAGE C VOLATILE STRICT -- marked as VOLATILE to avoid multiple calls, even though the results are immutable
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment