`TodosDestroyer::EntityLeaveWorker` generates expensive SQL queries with multiple and unprotected namespace CTEs
Background
Today at 10:52 UTC, we received the following alert in production:
This was because as a percentage of all tuple-fetches, 80% taking place for an extended period were in the namespaces
table.
Reviewing the slowlogs on the primary, about 70% during this brief period were for TodosDestroyer::EntityLeaveWorker
, with the following gigantic SQL query being executed.
DELETE FROM "todos"
WHERE "todos"."group_id" IN (
SELECT
"id"
FROM (
SELECT
"namespaces".*
FROM
"namespaces"
INNER JOIN ( WITH RECURSIVE "base_and_descendants" AS ((
SELECT
$1 AS depth,
ARRAY[namespaces.id] AS tree_path,
$2 AS tree_cycle,
"namespaces".*
FROM
"namespaces"
WHERE
"namespaces"."type" = $3
AND "namespaces"."id" = $4)
UNION (
SELECT
("base_and_descendants"."depth" + $5),
tree_path || "namespaces".id,
"namespaces".id = ANY (tree_path),
"namespaces".*
FROM
"namespaces",
"base_and_descendants"
WHERE
"namespaces"."type" = $6
AND "namespaces"."parent_id" = "base_and_descendants"."id"
AND "base_and_descendants"."tree_cycle" = $7))
SELECT DISTINCT
"id",
"depth"
FROM
"base_and_descendants" AS "namespaces") namespaces_join_table ON namespaces_join_table.id = namespaces.id
WHERE
"namespaces"."type" = $8
ORDER BY
"namespaces_join_table"."depth" ASC) AS "namespaces"
WHERE
"namespaces"."type" = $9
AND "namespaces"."id" NOT IN (
SELECT
"namespaces"."id"
FROM (( WITH RECURSIVE "base_and_ancestors" AS ((
SELECT
"namespaces".*
FROM (( WITH "direct_groups" AS (
SELECT
"namespaces".*
FROM ((
SELECT
"namespaces".*
FROM
"namespaces"
INNER JOIN "members" ON "namespaces"."id" = "members"."source_id"
WHERE
"members"."type" = $10
AND "members"."source_type" = $11
AND "namespaces"."type" = $12
AND "members"."user_id" = $13
AND "members"."requested_at" IS NULL
AND (access_level >= $14))
UNION (
SELECT
namespaces.*
FROM
"projects"
INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
INNER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id"
WHERE
"project_authorizations"."user_id" = $15)) namespaces
WHERE
"namespaces"."type" = $16
)
SELECT
"namespaces".*
FROM ((
SELECT
"namespaces".*
FROM
"direct_groups" "namespaces"
WHERE
"namespaces"."type" = $17)
UNION (
SELECT
"namespaces".*
FROM
"namespaces"
INNER JOIN "group_group_links" ON "group_group_links"."shared_group_id" = "namespaces"."id"
WHERE
"namespaces"."type" = $18
AND "group_group_links"."shared_with_group_id" IN (
SELECT
"namespaces"."id"
FROM
"direct_groups" "namespaces"
WHERE
"namespaces"."type" = $19))) namespaces
WHERE
"namespaces"."type" = $20)
UNION (
SELECT
"namespaces".*
FROM
"namespaces"
INNER JOIN "members" ON "namespaces"."id" = "members"."source_id"
WHERE
"members"."type" = $21
AND "members"."source_type" = $22
AND "namespaces"."type" = $23
AND "members"."user_id" = $24
AND "members"."access_level" = $25
AND (EXISTS (
SELECT
$26
FROM
"plans"
INNER JOIN "gitlab_subscriptions" ON "gitlab_subscriptions"."hosted_plan_id" = "plans"."id"
WHERE
"plans"."name" IN ($27, $28, $29, $30)
AND (gitlab_subscriptions.namespace_id = namespaces.id))))) namespaces
WHERE
"namespaces"."type" = $31)
UNION (
SELECT
"namespaces".*
FROM
"namespaces",
"base_and_ancestors"
WHERE
"namespaces"."type" = $32
AND "namespaces"."id" = "base_and_ancestors"."parent_id")),
"base_and_descendants" AS ((
SELECT
"namespaces".*
FROM
"namespaces"
INNER JOIN "members" ON "namespaces"."id" = "members"."source_id"
WHERE
"members"."type" = $33
AND "members"."source_type" = $34
AND "namespaces"."type" = $35
AND "members"."user_id" = $36
AND "members"."requested_at" IS NULL
AND (access_level >= $37))
UNION (
SELECT
"namespaces".*
FROM
"namespaces",
"base_and_descendants"
WHERE
"namespaces"."type" = $38
AND "namespaces"."parent_id" = "base_and_descendants"."id"))
SELECT
"namespaces".*
FROM ((
SELECT
"namespaces".*
FROM
"base_and_ancestors" AS "namespaces"
WHERE
"namespaces"."type" = $39)
UNION (
SELECT
"namespaces".*
FROM
"base_and_descendants" AS "namespaces"
WHERE
"namespaces"."type" = $40)) namespaces
WHERE
"namespaces"."type" = $41)
UNION (
SELECT
"namespaces".*
FROM
"namespaces"
WHERE
"namespaces"."type" = $42
AND "namespaces"."visibility_level" IN ($43, $44))) namespaces
WHERE
"namespaces"."type" = $45
ORDER BY
"namespaces"."id" DESC))
AND "todos"."user_id" = $46
Looking in the Sidekiq logs, we could see that each of these queries was spending around 15s in statement time on the primary.
Over the past hour, we see how this worker's database time spiked in unison with the alert
https://log.gprd.gitlab.net/goto/3ef5b7f8b1d82d2f0526c980627ba02a
Over a longer period (2 days), this worker has a p95 of around 15s.
https://log.gprd.gitlab.net/goto/6a90d85f69a54a021ca73b39ceae50f4
Request
We should break this query up or consider other approaches to improving it, particularly in light of the tuple fetch cascading effect that we're seeing.