Skip to content

Commit

Permalink
MBS-9253: List EP release groups above singles
Browse files Browse the repository at this point in the history
This has been requested forever, and it makes sense in all markets except
maybe Japan. Given Japan would want singles before albums,
which we don't do anyway, this is probably not too relevant.
  • Loading branch information
reosarevok committed Oct 17, 2024
1 parent 4abc0a5 commit 08bda14
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 3 deletions.
7 changes: 6 additions & 1 deletion admin/sql/CreateFunctions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1928,7 +1928,12 @@ BEGIN
a_rg.artist,
-- Withdrawn releases were once official by definition
bool_and(r.status IS NOT NULL AND r.status != 1 AND r.status != 5),
rg.type::SMALLINT,
(rg.type ORDER BY CASE
WHEN rg.type = 3 THEN 2 -- Sort EPs above singles
WHEN rg.type = 2 THEN 3 -- Sort singles below EPs
ELSE rg.type
END ASC
)::SMALLINT,
array_agg(
DISTINCT st.secondary_type ORDER BY st.secondary_type)
FILTER (WHERE st.secondary_type IS NOT NULL
Expand Down
93 changes: 93 additions & 0 deletions admin/sql/updates/20241017-mbs-9253.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
\set ON_ERROR_STOP 1

BEGIN;

CREATE OR REPLACE FUNCTION get_artist_release_group_rows(
release_group_id INTEGER
) RETURNS SETOF artist_release_group AS $$
BEGIN
-- PostgreSQL 12 generates a vastly more efficient plan when only
-- one release group ID is passed. A condition like
-- `rg.id = any(...)` can be over 200x slower, even with only one
-- release group ID in the array.
RETURN QUERY EXECUTE $SQL$
SELECT DISTINCT ON (a_rg.artist, rg.id)
a_rg.is_track_artist,
a_rg.artist,
-- Withdrawn releases were once official by definition
bool_and(r.status IS NOT NULL AND r.status != 1 AND r.status != 5),
(rg.type ORDER BY CASE
WHEN rg.type = 3 THEN 2 -- Sort EPs above singles
WHEN rg.type = 2 THEN 3 -- Sort singles below EPs
ELSE rg.type
END ASC
)::SMALLINT,
array_agg(
DISTINCT st.secondary_type ORDER BY st.secondary_type)
FILTER (WHERE st.secondary_type IS NOT NULL
)::SMALLINT[],
integer_date(
rgm.first_release_date_year,
rgm.first_release_date_month,
rgm.first_release_date_day
),
left(rg.name, 1)::CHAR(1),
rg.id
FROM (
SELECT FALSE AS is_track_artist, rgacn.artist, rg.id AS release_group
FROM release_group rg
JOIN artist_credit_name rgacn ON rgacn.artist_credit = rg.artist_credit
UNION ALL
SELECT TRUE AS is_track_artist, tacn.artist, r.release_group
FROM release r
JOIN medium m ON m.release = r.id
JOIN track t ON t.medium = m.id
JOIN artist_credit_name tacn ON tacn.artist_credit = t.artist_credit
) a_rg
JOIN release_group rg ON rg.id = a_rg.release_group
LEFT JOIN release r ON r.release_group = rg.id
JOIN release_group_meta rgm ON rgm.id = rg.id
LEFT JOIN release_group_secondary_type_join st ON st.release_group = rg.id
$SQL$ || (CASE WHEN release_group_id IS NULL THEN '' ELSE 'WHERE rg.id = $1' END) ||
$SQL$
GROUP BY a_rg.is_track_artist, a_rg.artist, rgm.id, rg.id
ORDER BY a_rg.artist, rg.id, a_rg.is_track_artist
$SQL$
USING release_group_id;
END;
$$ LANGUAGE plpgsql;

-- We update the table for any existing RGs of type Single or EP
-- with this one-off script; the updated function will keep it up after this.
DO $$
DECLARE
release_group_ids INTEGER[];
release_group_id INTEGER;
BEGIN
SELECT array_agg(DISTINCT rg.id)
INTO release_group_ids
FROM release_group rg
WHERE rg.type = 2 OR rg.type = 3; -- Single or EP

IF coalesce(array_length(release_group_ids, 1), 0) > 0 THEN
-- If the user hasn't generated `artist_release_group`, then we
-- shouldn't update or insert to it. MBS determines whether to
-- use this table based on it being non-empty, so a partial
-- table would manifest as partial data on the website and
-- webservice.
PERFORM 1 FROM artist_release_group LIMIT 1;
IF FOUND THEN
DELETE FROM artist_release_group WHERE release_group = any(release_group_ids);

FOREACH release_group_id IN ARRAY release_group_ids LOOP
-- We handle each release group ID separately because
-- the `get_artist_release_group_rows` query can be
-- planned much more efficiently that way.
INSERT INTO artist_release_group
SELECT * FROM get_artist_release_group_rows(release_group_id);
END LOOP;
END IF;
END IF;
END $$;

COMMIT;
99 changes: 99 additions & 0 deletions admin/sql/updates/schema-change/30.all.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
-- Generated by CompileSchemaScripts.pl from:
-- 20241017-mbs-9253.sql
\set ON_ERROR_STOP 1
BEGIN;
SET search_path = musicbrainz, public;
SET LOCAL statement_timeout = 0;
--------------------------------------------------------------------------------
SELECT '20241017-mbs-9253.sql';


CREATE OR REPLACE FUNCTION get_artist_release_group_rows(
release_group_id INTEGER
) RETURNS SETOF artist_release_group AS $$
BEGIN
-- PostgreSQL 12 generates a vastly more efficient plan when only
-- one release group ID is passed. A condition like
-- `rg.id = any(...)` can be over 200x slower, even with only one
-- release group ID in the array.
RETURN QUERY EXECUTE $SQL$
SELECT DISTINCT ON (a_rg.artist, rg.id)
a_rg.is_track_artist,
a_rg.artist,
-- Withdrawn releases were once official by definition
bool_and(r.status IS NOT NULL AND r.status != 1 AND r.status != 5),
(rg.type ORDER BY CASE
WHEN rg.type = 3 THEN 2 -- Sort EPs above singles
WHEN rg.type = 2 THEN 3 -- Sort singles below EPs
ELSE rg.type
END ASC
)::SMALLINT,
array_agg(
DISTINCT st.secondary_type ORDER BY st.secondary_type)
FILTER (WHERE st.secondary_type IS NOT NULL
)::SMALLINT[],
integer_date(
rgm.first_release_date_year,
rgm.first_release_date_month,
rgm.first_release_date_day
),
left(rg.name, 1)::CHAR(1),
rg.id
FROM (
SELECT FALSE AS is_track_artist, rgacn.artist, rg.id AS release_group
FROM release_group rg
JOIN artist_credit_name rgacn ON rgacn.artist_credit = rg.artist_credit
UNION ALL
SELECT TRUE AS is_track_artist, tacn.artist, r.release_group
FROM release r
JOIN medium m ON m.release = r.id
JOIN track t ON t.medium = m.id
JOIN artist_credit_name tacn ON tacn.artist_credit = t.artist_credit
) a_rg
JOIN release_group rg ON rg.id = a_rg.release_group
LEFT JOIN release r ON r.release_group = rg.id
JOIN release_group_meta rgm ON rgm.id = rg.id
LEFT JOIN release_group_secondary_type_join st ON st.release_group = rg.id
$SQL$ || (CASE WHEN release_group_id IS NULL THEN '' ELSE 'WHERE rg.id = $1' END) ||
$SQL$
GROUP BY a_rg.is_track_artist, a_rg.artist, rgm.id, rg.id
ORDER BY a_rg.artist, rg.id, a_rg.is_track_artist
$SQL$
USING release_group_id;
END;
$$ LANGUAGE plpgsql;

-- We update the table for any existing RGs of type Single or EP
-- with this one-off script; the updated function will keep it up after this.
DO $$
DECLARE
release_group_ids INTEGER[];
release_group_id INTEGER;
BEGIN
SELECT array_agg(DISTINCT rg.id)
INTO release_group_ids
FROM release_group rg
WHERE rg.type = 2 OR rg.type = 3; -- Single or EP

IF coalesce(array_length(release_group_ids, 1), 0) > 0 THEN
-- If the user hasn't generated `artist_release_group`, then we
-- shouldn't update or insert to it. MBS determines whether to
-- use this table based on it being non-empty, so a partial
-- table would manifest as partial data on the website and
-- webservice.
PERFORM 1 FROM artist_release_group LIMIT 1;
IF FOUND THEN
DELETE FROM artist_release_group WHERE release_group = any(release_group_ids);

FOREACH release_group_id IN ARRAY release_group_ids LOOP
-- We handle each release group ID separately because
-- the `get_artist_release_group_rows` query can be
-- planned much more efficiently that way.
INSERT INTO artist_release_group
SELECT * FROM get_artist_release_group_rows(release_group_id);
END LOOP;
END IF;
END IF;
END $$;

COMMIT;
10 changes: 8 additions & 2 deletions lib/MusicBrainz/Server/Data/ReleaseGroup.pm
Original file line number Diff line number Diff line change
Expand Up @@ -349,14 +349,20 @@ sub _find_by_artist_slow
ON rgstj.secondary_type = rgst.id
WHERE rgstj.release_group = rg.id
ORDER BY name ASC
) secondary_types
) secondary_types,
(CASE
WHEN rg.type = 3 THEN 2 -- Sort EPs above singles
WHEN rg.type = 2 THEN 3 -- Sort singles below EPs
ELSE rg.type
END
) as sorted_type
FROM ' . $self->_table . '
JOIN artist_credit_name acn
ON acn.artist_credit = rg.artist_credit
' . join(' ', @$extra_joins) . '
WHERE ' . join(' AND ', @$conditions) . '
ORDER BY
rg.type, secondary_types,
sorted_type, secondary_types,
rgm.first_release_date_year,
rgm.first_release_date_month,
rgm.first_release_date_day,
Expand Down
5 changes: 5 additions & 0 deletions upgrade.json
Original file line number Diff line number Diff line change
Expand Up @@ -230,5 +230,10 @@
"20240223-mbs-13421-fks.sql",
"20240319-mbs-13514.sql"
]
},
"30": {
"all": [
"20241017-mbs-9253.sql"
]
}
}

0 comments on commit 08bda14

Please sign in to comment.