Optimize query for CI pipelines of merge request
What does this MR do?
When an MR with a large number of commits is opened, the number of pipelines is calculated for the latest 10,000
commits. In order to do that an SQL query is performed to check how many of ci_pipelines
belong to these 10,000
commits via source_sha
or sha
. Here's an example of such query for 20
commits:
SELECT * FROM (
(SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
AND ci_pipelines.source_sha IN (
'73dacea9cf40c50534b676a2031a6e6c71229775', '254df0104abe3f8a8b210c41e9006c7c12d66c2c', 'c7d9fbb3c58f3ca5d1dd719723ae8a171dc93e38',
'6d3cabd8d1bf88bb9a9f3e2d45ab306ecbbc25a5', '4c96c6a6c70b2cd0936b14dc7dc10074fe520c91', '44dae7a8af974f0b23b992b2394955febbb584d9',
'c8f8098daaeccb6c11c32ddb080826a671e3b665', '2d42fd8a16ff01b6543d83169d17ba804fd946be', '55b99b83f5dc29d6c953be0e9e23c435d6b3d825',
'64487732b6ce00fea50177b5d339b9a6d565250a', 'a1da4104a103050eb1f181e6f50a0a13101d9cbf', '4d0c7c9fc20a560093ba15336350dc813ea37a8f',
'a10767acc07657219e3851a8d96a2d025d8dbec8', 'a50b837c680f99228d64ae4746837bc0b1a518a2', '4781c6dac2da74963e41112537113599ca2d540f',
'831d60aad41639687c299ffb644d5fe2650935d8', '990c6e83391ed9a3be2f569427d8f1182a764b85', '0418570ad38a97030e6b0100d30ced90eea3b46e',
'bb472ffd55734193d8495c461694db93e6aa8620', 'c58a96b8cd207efcfa95efb5c32d0332e46a98e3'))
UNION ALL (
SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
AND ci_pipelines.sha IN (
'73dacea9cf40c50534b676a2031a6e6c71229775', '254df0104abe3f8a8b210c41e9006c7c12d66c2c', 'c7d9fbb3c58f3ca5d1dd719723ae8a171dc93e38',
'6d3cabd8d1bf88bb9a9f3e2d45ab306ecbbc25a5', '4c96c6a6c70b2cd0936b14dc7dc10074fe520c91', '44dae7a8af974f0b23b992b2394955febbb584d9',
'c8f8098daaeccb6c11c32ddb080826a671e3b665', '2d42fd8a16ff01b6543d83169d17ba804fd946be', '55b99b83f5dc29d6c953be0e9e23c435d6b3d825',
'64487732b6ce00fea50177b5d339b9a6d565250a', 'a1da4104a103050eb1f181e6f50a0a13101d9cbf', '4d0c7c9fc20a560093ba15336350dc813ea37a8f',
'a10767acc07657219e3851a8d96a2d025d8dbec8', 'a50b837c680f99228d64ae4746837bc0b1a518a2', '4781c6dac2da74963e41112537113599ca2d540f',
'831d60aad41639687c299ffb644d5fe2650935d8', '990c6e83391ed9a3be2f569427d8f1182a764b85', '0418570ad38a97030e6b0100d30ced90eea3b46e',
'bb472ffd55734193d8495c461694db93e6aa8620', 'c58a96b8cd207efcfa95efb5c32d0332e46a98e3'))
UNION ALL (SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9) OR ci_pipelines.source IS NULL)
AND ci_pipelines.ref = ‘master’
AND ci_pipelines.tag = FALSE
AND ci_pipelines.sha IN(
'73dacea9cf40c50534b676a2031a6e6c71229775', '254df0104abe3f8a8b210c41e9006c7c12d66c2c', 'c7d9fbb3c58f3ca5d1dd719723ae8a171dc93e38',
'6d3cabd8d1bf88bb9a9f3e2d45ab306ecbbc25a5', '4c96c6a6c70b2cd0936b14dc7dc10074fe520c91', '44dae7a8af974f0b23b992b2394955febbb584d9',
'c8f8098daaeccb6c11c32ddb080826a671e3b665', '2d42fd8a16ff01b6543d83169d17ba804fd946be', '55b99b83f5dc29d6c953be0e9e23c435d6b3d825',
'64487732b6ce00fea50177b5d339b9a6d565250a', 'a1da4104a103050eb1f181e6f50a0a13101d9cbf', '4d0c7c9fc20a560093ba15336350dc813ea37a8f',
'a10767acc07657219e3851a8d96a2d025d8dbec8', 'a50b837c680f99228d64ae4746837bc0b1a518a2', '4781c6dac2da74963e41112537113599ca2d540f',
'831d60aad41639687c299ffb644d5fe2650935d8', '990c6e83391ed9a3be2f569427d8f1182a764b85', '0418570ad38a97030e6b0100d30ced90eea3b46e',
'bb472ffd55734193d8495c461694db93e6aa8620', 'c58a96b8cd207efcfa95efb5c32d0332e46a98e3')
)
) ci_pipelines
For a larger number of commits, it's obviously much bigger request.
Here's a query plan for 20 commits: https://explain.depesz.com/s/JOWW
This MR introduces the changes which provide a subquery instead of a list of commits:
SELECT * FROM (
(SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
AND ci_pipelines.source_sha IN (
SELECT merge_request_diff_commits.sha FROM merge_request_diff_commits WHERE merge_request_diff_commits.merge_request_diff_id IN (
SELECT merge_request_diffs.id FROM merge_request_diffs WHERE merge_request_diffs.merge_request_id = 41385074 ORDER BY merge_request_diffs.id DESC LIMIT 100) LIMIT 10000))
UNION ALL (
SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10 AND ci_pipelines.merge_request_id = 41385074
AND ci_pipelines.sha IN (
SELECT encode(sha, ‘hex’) FROM merge_request_diff_commits WHERE merge_request_diff_commits.merge_request_diff_id IN (
SELECT merge_request_diffs.id FROM merge_request_diffs WHERE merge_request_diffs.merge_request_id = 41385074 ORDER BY merge_request_diffs.id DESC LIMIT 100) LIMIT 10000))
UNION ALL (SELECT ci_pipelines.* FROM ci_pipelines WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2) OR ci_pipelines.config_source IS NULL)
AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9) OR ci_pipelines.source IS NULL)
AND ci_pipelines.ref = ‘master’ AND ci_pipelines.tag = FALSE
AND ci_pipelines.sha IN (
SELECT encode(sha, ‘hex’) FROM merge_request_diff_commits WHERE merge_request_diff_commits.merge_request_diff_id IN (
SELECT merge_request_diffs.id FROM merge_request_diffs WHERE merge_request_diffs.merge_request_id = 41385074 ORDER BY merge_request_diffs.id DESC LIMIT 100) LIMIT 10000))) ci_pipelines
Here's a query plan for this request: https://explain.depesz.com/s/nKn1
Query which uses WITH
for optimization:
WITH merge_request_diff_commits AS
(SELECT merge_request_diff_commits.*
FROM merge_request_diff_commits
WHERE merge_request_diff_commits.merge_request_diff_id IN
(SELECT merge_request_diffs.id
FROM merge_request_diffs
WHERE merge_request_diffs.merge_request_id = 41385074
ORDER BY merge_request_diffs.id DESC
LIMIT 100)
LIMIT 10000)
SELECT ci_pipelines.*
FROM (
(SELECT ci_pipelines.*
FROM ci_pipelines
WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2)
OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10
AND ci_pipelines.merge_request_id = 41385074
AND ci_pipelines.source_sha IN
(SELECT merge_request_diff_commits.sha
FROM merge_request_diff_commits))
UNION ALL
(SELECT ci_pipelines.*
FROM ci_pipelines
WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2)
OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10
AND ci_pipelines.merge_request_id = 41385074
AND ci_pipelines.sha IN
(SELECT encode(sha, 'hex')
FROM merge_request_diff_commits))
UNION ALL
(SELECT ci_pipelines.*
FROM ci_pipelines
WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1, 2)
OR ci_pipelines.config_source IS NULL)
AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9)
OR ci_pipelines.source IS NULL)
AND ci_pipelines.ref = '11-9-stable-ee’'
AND ci_pipelines.tag = FALSE
AND ci_pipelines.sha IN
(SELECT encode(sha, 'hex')
FROM merge_request_diff_commits))) ci_pipelines
ORDER BY CASE ci_pipelines.source
WHEN (10) THEN 0
ELSE 1
END,
ci_pipelines.id DESC
Query plan: https://explain.depesz.com/s/sgHr
Query which uses INNER JOIN
instead of a subquery:
WITH shas AS
(SELECT merge_request_diff_commits.sha
FROM merge_request_diff_commits
WHERE merge_request_diff_commits.merge_request_diff_id IN
(SELECT merge_request_diffs.id
FROM merge_request_diffs
WHERE merge_request_diffs.merge_request_id = 41385074
ORDER BY merge_request_diffs.id DESC
LIMIT 100)
LIMIT 10000)
SELECT ci_pipelines.*
FROM (
(SELECT ci_pipelines.*
FROM ci_pipelines
INNER JOIN shas ON shas.sha = ci_pipelines.source_sha
WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1,
2)
OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10
AND ci_pipelines.merge_request_id = 41385074)
UNION ALL
(SELECT ci_pipelines.*
FROM ci_pipelines
INNER JOIN shas ON encode(shas.sha, 'hex') = ci_pipelines.sha
WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1,
2)
OR ci_pipelines.config_source IS NULL)
AND ci_pipelines.source = 10
AND ci_pipelines.merge_request_id = 41385074)
UNION ALL
(SELECT ci_pipelines.*
FROM ci_pipelines
INNER JOIN shas ON encode(shas.sha, 'hex') = ci_pipelines.sha
WHERE ci_pipelines.project_id = 278964
AND (ci_pipelines.config_source IN (1,
2)
OR ci_pipelines.config_source IS NULL)
AND (ci_pipelines.source IN (1, 2, 3, 4, 5, 6, 7, 8, 11, 9)
OR ci_pipelines.source IS NULL)
AND ci_pipelines.ref = '11-9-stable-ee'
AND ci_pipelines.tag = FALSE)) ci_pipelines
ORDER BY CASE ci_pipelines.source
WHEN (10) THEN 0
ELSE 1
END,
ci_pipelines.id DESC
Query plan: https://explain.depesz.com/s/KhJw
Related issue: #35612 (closed)