Commit 75b4a67c authored by Lukáš Lalinský's avatar Lukáš Lalinský

Multi-DB setup

parent bb009801
[database]
two_phase_commit=yes
[database:default]
host=127.0.0.1
port=5432
port=15432
user=acoustid
name=acoustid_test
password=acoustid
[database:slow]
host=127.0.0.1
port=15432
user=acoustid
name=acoustid_slow_test
password=acoustid
[logging]
level=WARNING
level.sqlalchemy=WARNING
syslog=yes
syslog_facility=local1
[index]
host=127.0.0.1
port=16080
[redis]
host=127.0.0.1
port=6379
port=16379
[website]
base_url=http://acoustid.org/
......
[database]
name=acoustid
user=acoustid
password=XXX
superuser=postgres
host=localhost
port=5432
[database_slow]
name=acoustid
user=acoustid
password=XXX
host=localhost
port=5432
......
This diff is collapsed.
......@@ -18,8 +18,10 @@ def read_env_item(obj, key, name, convert=None):
value = None
if name in os.environ:
value = os.environ[name]
if name + '_FILE' in os.environ:
logger.info('Reading config value from environment variable %s', name)
elif name + '_FILE' in os.environ:
value = open(os.environ[name + '_FILE']).read().strip()
logger.info('Reading config value from environment variable %s', name + '_FILE')
if value is not None:
if convert is not None:
value = convert(value)
......@@ -39,6 +41,35 @@ class BaseConfig(object):
pass
class DatabasesConfig(BaseConfig):
def __init__(self):
self.databases = {
'default': DatabaseConfig(),
'slow': DatabaseConfig(),
}
self.use_two_phase_commit = False
def create_engines(self, **kwargs):
engines = {}
for name, db_config in self.databases.items():
engines[name] = db_config.create_engine(**kwargs)
return engines
def read_section(self, parser, section):
if parser.has_option(section, 'two_phase_commit'):
self.use_two_phase_commit = parser.getboolean(section, 'two_phase_commit')
for name, sub_config in self.databases.items():
sub_section = '{}:{}'.format(section, name)
sub_config.read_section(parser, sub_section)
def read_env(self, prefix):
read_env_item(self, 'use_two_phase_commit', prefix + 'TWO_PHASE_COMMIT', convert=str_to_bool)
for name, sub_config in self.databases.items():
sub_prefix = prefix + name.upper() + '_'
sub_config.read_env(sub_prefix)
class DatabaseConfig(BaseConfig):
def __init__(self):
......@@ -103,21 +134,21 @@ class DatabaseConfig(BaseConfig):
if parser.has_option(section, 'password'):
self.password = parser.get(section, 'password')
if parser.has_option(section, 'pool_size'):
self.password = parser.getint(section, 'pool_size')
self.pool_size = parser.getint(section, 'pool_size')
if parser.has_option(section, 'pool_recycle'):
self.password = parser.getint(section, 'pool_recycle')
self.pool_recycle = parser.getint(section, 'pool_recycle')
if parser.has_option(section, 'pool_pre_ping'):
self.password = parser.getboolean(section, 'pool_pre_ping')
self.pool_pre_ping = parser.getboolean(section, 'pool_pre_ping')
def read_env(self, prefix):
read_env_item(self, 'name', prefix + 'POSTGRES_DB')
read_env_item(self, 'host', prefix + 'POSTGRES_HOST')
read_env_item(self, 'port', prefix + 'POSTGRES_PORT', convert=int)
read_env_item(self, 'user', prefix + 'POSTGRES_USER')
read_env_item(self, 'password', prefix + 'POSTGRES_PASSWORD')
read_env_item(self, 'pool_size', prefix + 'POSTGRES_POOL_SIZE', convert=int)
read_env_item(self, 'pool_recycle', prefix + 'POSTGRES_POOL_RECYCLE', convert=int)
read_env_item(self, 'pool_pre_ping', prefix + 'POSTGRES_POOL_PRE_PING', convert=str_to_bool)
read_env_item(self, 'name', prefix + 'NAME')
read_env_item(self, 'host', prefix + 'HOST')
read_env_item(self, 'port', prefix + 'PORT', convert=int)
read_env_item(self, 'user', prefix + 'USER')
read_env_item(self, 'password', prefix + 'PASSWORD')
read_env_item(self, 'pool_size', prefix + 'POOL_SIZE', convert=int)
read_env_item(self, 'pool_recycle', prefix + 'POOL_RECYCLE', convert=int)
read_env_item(self, 'pool_pre_ping', prefix + 'POOL_PRE_PING', convert=str_to_bool)
class IndexConfig(BaseConfig):
......@@ -342,7 +373,7 @@ class RateLimiterConfig(BaseConfig):
class Config(object):
def __init__(self):
self.database = DatabaseConfig()
self.databases = DatabasesConfig()
self.logging = LoggingConfig()
self.website = WebSiteConfig()
self.index = IndexConfig()
......@@ -357,7 +388,7 @@ class Config(object):
logger.info("Loading configuration file %s", path)
parser = ConfigParser.RawConfigParser()
parser.read(path)
self.database.read(parser, 'database')
self.databases.read(parser, 'database')
self.logging.read(parser, 'logging')
self.website.read(parser, 'website')
self.index.read(parser, 'index')
......@@ -373,7 +404,7 @@ class Config(object):
prefix = 'ACOUSTID_TEST_'
else:
prefix = 'ACOUSTID_'
self.database.read_env(prefix)
self.databases.read_env(prefix)
self.logging.read_env(prefix)
self.website.read_env(prefix)
self.index.read_env(prefix)
......
......@@ -21,3 +21,6 @@ FINGERPRINT_MAX_LENGTH_DIFF = 7
FINGERPRINT_MAX_ALLOWED_LENGTH_DIFF = 30
MAX_REQUESTS_PER_SECOND = 3
MAX_FOREIGNID_NAMESPACE_LENGTH = 10
MAX_FOREIGNID_VALUE_LENGTH = 64
......@@ -28,6 +28,7 @@ def insert_submission(conn, data):
'source_id': data.get('source_id'),
'format_id': data.get('format_id'),
'meta_id': data.get('meta_id'),
'foreignid': data.get('foreignid'),
'foreignid_id': data.get('foreignid_id'),
})
id = conn.execute(insert_stmt).inserted_primary_key[0]
......
from sqlalchemy.orm import sessionmaker
from acoustid.tables import metadata
Session = sessionmaker()
def get_bind_args(engines):
binds = {}
for table in metadata.sorted_tables:
bind_key = table.info.get('bind_key', 'default')
if bind_key != 'default':
binds[table] = engines[bind_key]
return {'bind': engines['default'], 'binds': binds}
def get_session_args(script):
kwargs = {'twophase': script.config.databases.use_two_phase_commit}
kwargs.update(get_bind_args(script.db_engines))
return kwargs
class DatabaseContext(object):
def __init__(self, bind):
self.session = Session(bind=bind)
def __init__(self, script):
self.session = Session(**get_session_args(script))
def __enter__(self):
return self
......
......@@ -79,3 +79,11 @@ class StatsLookups(Base):
__table__ = tables.stats_lookups
application = relationship('Application')
class Submission(Base):
__table__ = tables.submission
class SubmissionResult(Base):
__table__ = tables.submission_result
......@@ -11,11 +11,26 @@ from optparse import OptionParser
from acoustid.config import Config
from acoustid.indexclient import IndexClientPool
from acoustid.utils import LocalSysLogHandler
from acoustid.db import DatabaseContext
from acoustid._release import GIT_RELEASE
logger = logging.getLogger(__name__)
class ScriptContext(object):
def __init__(self, db, redis, index):
self.db = db
self.redis = redis
self.index = index
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.db.close()
class Script(object):
def __init__(self, config_path, tests=False):
......@@ -23,25 +38,30 @@ class Script(object):
if config_path:
self.config.read(config_path)
self.config.read_env(tests=tests)
if tests:
self.engine = sqlalchemy.create_engine(self.config.database.create_url(),
poolclass=sqlalchemy.pool.AssertionPool)
else:
self.engine = sqlalchemy.create_engine(self.config.database.create_url())
create_engine_kwargs = {'poolclass': sqlalchemy.pool.AssertionPool} if tests else {}
self.db_engines = self.config.databases.create_engines(**create_engine_kwargs)
if not self.config.index.host:
self.index = None
else:
self.index = IndexClientPool(host=self.config.index.host,
port=self.config.index.port,
recycle=60)
if not self.config.redis.host:
self.redis = None
else:
self.redis = Redis(host=self.config.redis.host,
port=self.config.redis.port)
self._console_logging_configured = False
self.setup_logging()
@property
def engine(self):
return self.db_engines['default']
def setup_logging(self):
for logger_name, level in sorted(self.config.logging.levels.items()):
logging.getLogger(logger_name).setLevel(level)
......@@ -66,6 +86,10 @@ class Script(object):
def setup_sentry(self):
sentry_sdk.init(self.config.sentry.script_dsn, release=GIT_RELEASE)
def context(self):
db = DatabaseContext(self).session
return ScriptContext(db=db, redis=self.redis, index=self.index)
def run_script(func, option_cb=None, master_only=False):
parser = OptionParser()
......
......@@ -9,7 +9,7 @@ from cStringIO import StringIO
from werkzeug.exceptions import HTTPException
from werkzeug.routing import Map, Rule, Submount
from werkzeug.wrappers import Request
from werkzeug.contrib.fixers import ProxyFix
from werkzeug.middleware.proxy_fix import ProxyFix
from acoustid.script import Script
from acoustid._release import GIT_RELEASE
import acoustid.api.v1
......@@ -53,12 +53,15 @@ admin_url_rules = [
class Server(Script):
def __init__(self, config_path):
super(Server, self).__init__(config_path)
def __init__(self, config_path, **kwargs):
super(Server, self).__init__(config_path, **kwargs)
url_rules = api_url_rules + admin_url_rules
self.url_map = Map(url_rules, strict_slashes=False)
def __call__(self, environ, start_response):
return self.wsgi_app(environ, start_response)
def wsgi_app(self, environ, start_response):
urls = self.url_map.bind_to_environ(environ)
handler = None
try:
......@@ -112,16 +115,16 @@ def add_cors_headers(app):
return wrapped_app
def make_application(config_path):
def make_application(config_path, **kwargs):
"""Construct a WSGI application for the AcoustID server
:param config_path: path to the server configuration file
"""
server = Server(config_path)
server = Server(config_path, **kwargs)
server.setup_sentry()
app = GzipRequestMiddleware(server)
app = ProxyFix(app)
app = SentryWsgiMiddleware(app)
app = replace_double_slashes(app)
app = add_cors_headers(app)
return server, app
server.wsgi_app = GzipRequestMiddleware(server.wsgi_app)
server.wsgi_app = ProxyFix(server.wsgi_app)
server.wsgi_app = SentryWsgiMiddleware(server.wsgi_app)
server.wsgi_app = replace_double_slashes(server.wsgi_app)
server.wsgi_app = add_cors_headers(server.wsgi_app)
return server
import sqlalchemy
import sqlalchemy.event
from sqlalchemy import (
MetaData, Table, Column, Index,
MetaData, Table, Column, Index, Sequence,
ForeignKey, CheckConstraint,
Integer, String, DateTime, Boolean, Date, Text, SmallInteger, BigInteger, CHAR,
DDL, sql,
......@@ -103,7 +104,7 @@ source = Table('source', metadata,
Index('source_idx_uniq', 'application_id', 'account_id', 'version', unique=True),
)
submission = Table('submission', metadata,
submission_old = Table('submission_old', metadata,
Column('id', Integer, primary_key=True),
Column('fingerprint', ARRAY(Integer), nullable=False),
Column('length', SmallInteger, CheckConstraint('length>0'), nullable=False),
......@@ -118,7 +119,55 @@ submission = Table('submission', metadata,
Column('foreignid_id', Integer, ForeignKey('foreignid.id')),
)
Index('submission_idx_handled', submission.c.id, postgresql_where=submission.c.handled == False) # noqa: E712
Index('submission_idx_handled', submission_old.c.id, postgresql_where=submission_old.c.handled == False) # noqa: E712
submission_id_seq = Sequence('submission_id_seq', metadata=metadata)
submission = Table('submission', metadata,
Column('id', Integer, submission_id_seq, server_default=submission_id_seq.next_value(), primary_key=True),
Column('created', DateTime(timezone=True), server_default=sql.func.current_timestamp(), nullable=False),
Column('handled', Boolean, default=False, server_default=sql.false()),
Column('account_id', Integer, nullable=False), # ForeignKey('account.id')
Column('application_id', Integer, nullable=False), # ForeignKey('application.id')
Column('application_version', String),
Column('fingerprint', ARRAY(Integer), nullable=False),
Column('duration', Integer, CheckConstraint('duration>0'), nullable=False),
Column('bitrate', Integer, CheckConstraint('bitrate>0')),
Column('format', String),
Column('mbid', UUID),
Column('puid', UUID),
Column('foreignid', String),
Column('track', String),
Column('artist', String),
Column('album', String),
Column('album_artist', String),
Column('track_no', Integer),
Column('disc_no', Integer),
Column('year', Integer),
info={'bind_key': 'slow'},
)
submission_result = Table('submission_result', metadata,
Column('submission_id', Integer, primary_key=True, autoincrement=False),
Column('created', DateTime(timezone=True), server_default=sql.func.current_timestamp(), nullable=False),
Column('account_id', Integer, nullable=False), # ForeignKey('account.id')
Column('application_id', Integer, nullable=False), # ForeignKey('application.id')
Column('application_version', String),
Column('fingerprint_id', Integer, nullable=False), # ForeignKey('fingerprint.id')
Column('track_id', Integer, nullable=False), # ForeignKey('track.id')
Column('meta_id', Integer), # ForeignKey('meta.id')
Column('mbid', UUID),
Column('puid', UUID),
Column('foreignid', String),
info={'bind_key': 'slow'},
)
stats = Table('stats', metadata,
Column('id', Integer, primary_key=True),
......@@ -193,6 +242,8 @@ fingerprint = Table('fingerprint', metadata,
Index('fingerprint_idx_track_id', 'track_id'),
)
fingerprint.add_is_dependent_on(track)
fingerprint_source = Table('fingerprint_source', metadata,
Column('id', Integer, primary_key=True),
Column('fingerprint_id', Integer, ForeignKey('fingerprint.id'), nullable=False),
......
......@@ -9,6 +9,7 @@ import datetime
import hmac
import base64
import six
from acoustid.const import MAX_FOREIGNID_NAMESPACE_LENGTH, MAX_FOREIGNID_VALUE_LENGTH
from six.moves.urllib.request import urlopen
from six.moves.urllib.parse import urlencode
from logging import Handler
......@@ -54,7 +55,15 @@ def is_int(s):
def is_foreignid(s):
return bool(re.match(r'^[0-9a-z]+:.+$', s))
match = re.match(r'^([0-9a-z]+):(.+)$', s)
if match is None:
return False
namespace, value = match.groups()
if len(namespace) > MAX_FOREIGNID_NAMESPACE_LENGTH:
return False
if len(value) > MAX_FOREIGNID_VALUE_LENGTH:
return False
return True
def singular(plural):
......
......@@ -5,9 +5,10 @@ import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
from flask import Flask, request, session
from flask.sessions import SecureCookieSessionInterface
from werkzeug.contrib.fixers import ProxyFix
from werkzeug.middleware.proxy_fix import ProxyFix
from sqlalchemy.orm import scoped_session
from acoustid.script import Script
from acoustid.db import get_session_args
from acoustid.web import db
from acoustid.web.views.general import general_page
from acoustid.web.views.user import user_page
......@@ -36,6 +37,8 @@ def make_application(config_filename=None, tests=False):
GOOGLE_OAUTH_CLIENT_ID=config.website.google_oauth_client_id,
GOOGLE_OAUTH_CLIENT_SECRET=config.website.google_oauth_client_secret,
)
if tests:
app.config['TESTING'] = True
app.acoustid_script = script
app.acoustid_config = config
......@@ -88,7 +91,7 @@ def make_application(config_filename=None, tests=False):
from acoustid.api import get_health_response
return get_health_response(script, request)
db.session_factory.configure(bind=config.database.create_engine())
db.session_factory.configure(**get_session_args(script))
db.session = scoped_session(db.session_factory, scopefunc=get_flask_request_scope)
app.register_blueprint(general_page)
......
......@@ -33,7 +33,7 @@ def render_page(name, **context):
text = file.read().decode('utf8')
text = render_template_string(text, **context)
md = Markdown(extensions=['meta'])
md.treeprocessors["flask_links"] = MarkdownFlaskUrlProcessor(md)
md.treeprocessors.register(MarkdownFlaskUrlProcessor(md), 'flask_links', 50)
html = md.convert(text)
title = ' '.join(md.Meta.get('title', []))
return render_template('page.html', content=html, title=title)
......
CREATE DATABASE "acoustid";
CREATE DATABASE "acoustid_test";
CREATE DATABASE "acoustid_slow";
CREATE DATABASE "acoustid_slow_test";
\c acoustid
create extension intarray;
create extension pgcrypto;
......@@ -12,3 +15,15 @@ create extension intarray;
create extension pgcrypto;
create extension acoustid;
create extension cube;
\c acoustid_slow
create extension intarray;
create extension pgcrypto;
create extension acoustid;
create extension cube;
\c acoustid_slow_test
create extension intarray;
create extension pgcrypto;
create extension acoustid;
create extension cube;
......@@ -29,6 +29,8 @@ script_location = alembic
# are written from script.py.mako
# output_encoding = utf-8
databases = default, slow
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
......
Generic single-database configuration.
\ No newline at end of file
AcoustID's multi-database configuration.
from __future__ import with_statement
import os
import logging
from alembic import context
from sqlalchemy import engine_from_config, pool
from logging.config import fileConfig
config = context.config
fileConfig(config.config_file_name)
logger = logging.getLogger("alembic.env")
import acoustid.tables
target_metadata = acoustid.tables.metadata
......@@ -17,12 +19,25 @@ acoustid_config = acoustid.config.Config()
acoustid_config.read(acoustid_config_filename)
acoustid_config.read_env()
def include_object(obj, name, type, reflected, compare_to):
if type == "table" and obj.schema == "musicbrainz":
return False
if type == "column" and not obj.table.schema == "musicbrainz":
return False
return True
use_two_phase_commit = acoustid_config.databases.use_two_phase_commit
def include_object(db_name):
def inner(obj, name, obj_type, reflected, compare_to):
if obj_type == "table":
if obj.schema == "musicbrainz":
return False
bind_key = obj.info.get('bind_key', 'default')
if bind_key != db_name:
return False
if obj_type == "column":
if obj.table.schema == "musicbrainz":
return False
bind_key = obj.table.info.get('bind_key', 'default')
if bind_key != db_name:
return False
return True
return inner
def run_migrations_offline():
......@@ -37,13 +52,18 @@ def run_migrations_offline():
script output.
"""
url = acoustid_config.database.create_url()
context.configure(
url=url, target_metadata=target_metadata, literal_binds=True,
include_object=include_object)
for name, db_config in acoustid_config.databases.databases.items():
logger.info("Migrating database %s" % name)
context.configure(
url=db_config.create_url(),
target_metadata=target_metadata,
literal_binds=True,
include_object=include_object(name),
)
with context.begin_transaction():
context.run_migrations()
with context.begin_transaction():
context.run_migrations()
def run_migrations_online():
......@@ -53,17 +73,45 @@ def run_migrations_online():
and associate a connection with the context.
"""
connectable = acoustid_config.database.create_engine(poolclass=pool.NullPool)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
include_object=include_object,
)
with context.begin_transaction():
context.run_migrations()
engines = {}
for name, db_config in acoustid_config.databases.databases.items():
engines[name] = rec = {}
rec["engine"] = db_config.create_engine(poolclass=pool.NullPool)
for name, rec in engines.items():
engine = rec["engine"]
rec["connection"] = conn = engine.connect()
if use_two_phase_commit:
rec["transaction"] = conn.begin_twophase()
else:
rec["transaction"] = conn.begin()
try:
for name, rec in engines.items():
logger.info("Migrating database %s" % name)
context.configure(
connection=rec["connection"],
upgrade_token="%s_upgrades" % name,
downgrade_token="%s_downgrades" % name,
target_metadata=target_metadata,
include_object=include_object(name),
)
context.run_migrations(engine_name=name)
if use_two_phase_commit:
for rec in engines.values():
rec["transaction"].prepare()
for rec in engines.values():
rec["transaction"].commit()
except:
for rec in engines.values():
rec["transaction"].rollback()
raise
finally:
for rec in engines.values():
rec["connection"].close()
if context.is_offline_mode():
......
"""${message}
<%!
import re
%>"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
......@@ -12,13 +18,20 @@ down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
def upgrade():
${upgrades if upgrades else "pass"}
def upgrade(engine_name):
globals()["upgrade_%s" % engine_name]()
def downgrade(engine_name):
globals()["downgrade_%s" % engine_name]()
% for db_name in re.split(r',\s*', config.get_main_option("databases")):
def upgrade_${db_name}():
${context.get("%s_upgrades" % db_name, "pass")}
def downgrade():
${downgrades if downgrades else "pass"}
def downgrade_${db_name}():
${context.get("%s_downgrades" % db_name, "pass")}
% endfor
......@@ -16,7 +16,15 @@ from alembic import op
import sqlalchemy as sa
def upgrade():
def upgrade(engine_name):
globals()["upgrade_%s" % engine_name]()