submission.py 6.18 KB
Newer Older
Lukáš Lalinský's avatar
Lukáš Lalinský committed
1
# Copyright (C) 2011 Lukas Lalinsky
2
# Distributed under the MIT license, see the LICENSE file for details.
Lukáš Lalinský's avatar
Lukáš Lalinský committed
3 4 5

import logging
from sqlalchemy import sql
6
from acoustid import tables as schema, const
Lukáš Lalinský's avatar
Lukáš Lalinský committed
7
from acoustid.data.fingerprint import insert_fingerprint, inc_fingerprint_submission_count, FingerprintSearcher
8
from acoustid.data.musicbrainz import resolve_mbid_redirect
Lukáš Lalinský's avatar
Lukáš Lalinský committed
9 10 11 12
from acoustid.data.track import (
    insert_track, insert_mbid, insert_puid, merge_tracks, insert_track_meta,
    can_add_fp_to_track, can_merge_tracks, insert_track_foreignid,
)
13

Lukáš Lalinský's avatar
Lukáš Lalinský committed
14 15 16 17 18
logger = logging.getLogger(__name__)


def insert_submission(conn, data):
    """
19
    Insert a new submission into the database
Lukáš Lalinský's avatar
Lukáš Lalinský committed
20 21 22 23 24 25 26 27
    """
    with conn.begin():
        insert_stmt = schema.submission.insert().values({
            'fingerprint': data['fingerprint'],
            'length': data['length'],
            'bitrate': data.get('bitrate'),
            'mbid': data.get('mbid'),
            'puid': data.get('puid'),
28
            'source_id': data.get('source_id'),
29
            'format_id': data.get('format_id'),
30
            'meta_id': data.get('meta_id'),
Lukáš Lalinský's avatar
Lukáš Lalinský committed
31
            'foreignid': data.get('foreignid'),
32
            'foreignid_id': data.get('foreignid_id'),
Lukáš Lalinský's avatar
Lukáš Lalinský committed
33 34
        })
        id = conn.execute(insert_stmt).inserted_primary_key[0]
35 36 37 38
    logger.debug("Inserted submission %r with data %r", id, data)
    return id


39
def import_submission(conn, submission, index=None):
40 41 42 43
    """
    Import the given submission into the main fingerprint database
    """
    with conn.begin():
44 45 46
        update_stmt = schema.submission.update().where(
            schema.submission.c.id == submission['id'])
        conn.execute(update_stmt.values(handled=True))
47 48
        mbids = []
        if submission['mbid']:
Lukáš Lalinský's avatar
Lukáš Lalinský committed
49
            mbids.append(resolve_mbid_redirect(conn, submission['mbid']))
50
        logger.info("Importing submission %d with MBIDs %s",
Lukáš Lalinský's avatar
Lukáš Lalinský committed
51
                    submission['id'], ', '.join(mbids))
52
        num_unique_items = len(set(submission['fingerprint']))
53
        if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS:
54 55
            logger.info("Skipping, has only %d unique items", num_unique_items)
            return
56
        num_query_items = conn.execute("SELECT icount(acoustid_extract_query(%(fp)s))", dict(fp=submission['fingerprint']))
57 58 59
        if not num_query_items:
            logger.info("Skipping, no data to index")
            return
60 61 62
        searcher = FingerprintSearcher(conn, index, fast=False)
        searcher.min_score = const.TRACK_MERGE_THRESHOLD
        matches = searcher.search(submission['fingerprint'], submission['length'])
63 64 65 66 67 68 69 70
        fingerprint = {
            'id': None,
            'track_id': None,
            'fingerprint': submission['fingerprint'],
            'length': submission['length'],
            'bitrate': submission['bitrate'],
            'format_id': submission['format_id'],
        }
71
        if matches:
72 73 74
            all_track_ids = set()
            possible_track_ids = set()
            for m in matches:
75 76 77 78 79 80 81 82 83 84
                if m['track_id'] in all_track_ids:
                    continue
                all_track_ids.add(m['track_id'])
                logger.debug("Fingerprint %d with track %d is %d%% similar", m['id'], m['track_id'], m['score'] * 100)
                if can_add_fp_to_track(conn, m['track_id'], submission['fingerprint'], submission['length']):
                    possible_track_ids.add(m['track_id'])
                    if not fingerprint['track_id']:
                        fingerprint['track_id'] = m['track_id']
                        if m['score'] > const.FINGERPRINT_MERGE_THRESHOLD:
                            fingerprint['id'] = m['id']
85 86
            if len(possible_track_ids) > 1:
                for group in can_merge_tracks(conn, possible_track_ids):
87
                    if fingerprint['track_id'] in group and len(group) > 1:
88 89 90 91
                        fingerprint['track_id'] = min(group)
                        group.remove(fingerprint['track_id'])
                        merge_tracks(conn, fingerprint['track_id'], list(group))
                        break
92 93 94
        if not fingerprint['track_id']:
            fingerprint['track_id'] = insert_track(conn)
        if not fingerprint['id']:
95
            fingerprint['id'] = insert_fingerprint(conn, fingerprint, submission['id'], submission['source_id'])
96
        else:
97
            inc_fingerprint_submission_count(conn, fingerprint['id'], submission['id'], submission['source_id'])
98
        for mbid in mbids:
99
            insert_mbid(conn, fingerprint['track_id'], mbid, submission['id'], submission['source_id'])
Lukáš Lalinský's avatar
Lukáš Lalinský committed
100
        if submission['puid'] and submission['puid'] != '00000000-0000-0000-0000-000000000000':
101
            insert_puid(conn, fingerprint['track_id'], submission['puid'], submission['id'], submission['source_id'])
102 103
        if submission['meta_id']:
            insert_track_meta(conn, fingerprint['track_id'], submission['meta_id'], submission['id'], submission['source_id'])
104 105
        if submission['foreignid_id']:
            insert_track_foreignid(conn, fingerprint['track_id'], submission['foreignid_id'], submission['id'], submission['source_id'])
106
        return fingerprint
107 108


109
def import_queued_submissions(conn, index=None, limit=100, ids=None):
110 111 112
    """
    Import the given submission into the main fingerprint database
    """
113 114 115 116
    query = (
        schema.submission.select(schema.submission.c.handled == False)  # noqa: F712
        .order_by(schema.submission.c.mbid.nullslast(), schema.submission.c.id.desc())
    )
117
    if ids is not None:
118
        query = query.where(schema.submission.c.id.in_(ids))
119 120
    if limit is not None:
        query = query.limit(limit)
121 122
    count = 0
    for submission in conn.execute(query):
123
        import_submission(conn, submission, index=index)
124 125
        count += 1
    logger.debug("Imported %d submissions", count)
126
    return count
Lukáš Lalinský's avatar
Lukáš Lalinský committed
127

128 129 130 131 132 133 134

def lookup_submission_status(db, ids):
    if not ids:
        return {}
    source = schema.fingerprint_source.\
        join(schema.fingerprint).\
        join(schema.track)
Lukáš Lalinský's avatar
Lukáš Lalinský committed
135
    query = sql.select([schema.fingerprint_source.c.submission_id, schema.track.c.gid], from_obj=source).\
136 137 138
        where(schema.fingerprint_source.c.submission_id.in_(ids))
    results = {}
    for id, track_gid in db.execute(query):
Lukáš Lalinský's avatar
Lukáš Lalinský committed
139
        results[id] = track_gid
140
    return results