Fix plan checks for mirror queries
Plans are no longer through namespace.plan_id, but through the root namespace via gitlab_subscriptions. (See gitlab-org/gitlab#196862 (closed) for more details on this being confusing.)
This query matches the one in gitlab-org/gitlab!27796 (merged), which is used for the actual mirroring we do
I tested the mirror queries on a production DB:
require 'gitlab_exporter'
require 'gitlab_exporter/database/row_count'
collector = GitLab::Exporter::Database::RowCountCollector.new(connection_string: 'host=localhost')
GitLab::Exporter::Database::RowCountCollector::QUERIES.each do |key, query|
puts ''
puts '-' * 70
puts ''
puts key
puts collector.send(:construct_query, query)
end
And got these queries and results:
gitlabhq_production=> SELECT COUNT(*) FROM projects INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
gitlabhq_production-> INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
gitlabhq_production(> WITH RECURSIVE "base_and_ancestors" AS (
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
gitlabhq_production(> UNION
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
gitlabhq_production(> ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
gitlabhq_production(> )
gitlabhq_production-> LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
gitlabhq_production-> LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
gitlabhq_production-> WHERE projects.mirror = true
gitlabhq_production-> AND project_mirror_data.retry_count <= 14
gitlabhq_production-> AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
gitlabhq_production->
gitlabhq_production-> AND project_mirror_data.status NOT IN ('scheduled', 'started')
gitlabhq_production-> AND project_mirror_data.next_execution_timestamp <= NOW()
gitlabhq_production-> ;
count
-------
545
(1 row)
gitlabhq_production=> SELECT COUNT(*) FROM projects INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
gitlabhq_production-> INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
gitlabhq_production(> WITH RECURSIVE "base_and_ancestors" AS (
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
gitlabhq_production(> UNION
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
gitlabhq_production(> ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
gitlabhq_production(> )
gitlabhq_production-> LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
gitlabhq_production-> LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
gitlabhq_production-> WHERE projects.mirror = true
gitlabhq_production-> AND project_mirror_data.retry_count <= 14
gitlabhq_production-> AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
gitlabhq_production->
gitlabhq_production-> AND project_mirror_data.status NOT IN ('scheduled', 'started')
gitlabhq_production-> AND (project_mirror_data.next_execution_timestamp - project_mirror_data.last_update_at) <= '30 minutes'::interval
gitlabhq_production-> AND project_mirror_data.last_update_at < NOW() - '30 minutes'::interval
gitlabhq_production-> ;
count
-------
1
(1 row)
gitlabhq_production=> SELECT COUNT(*) FROM projects INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
gitlabhq_production-> INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
gitlabhq_production(> WITH RECURSIVE "base_and_ancestors" AS (
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
gitlabhq_production(> UNION
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
gitlabhq_production(> ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
gitlabhq_production(> )
gitlabhq_production-> LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
gitlabhq_production-> LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
gitlabhq_production-> WHERE projects.mirror = true
gitlabhq_production-> AND project_mirror_data.retry_count <= 14
gitlabhq_production-> AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
gitlabhq_production->
gitlabhq_production-> AND project_mirror_data.status NOT IN ('scheduled', 'started')
gitlabhq_production-> AND project_mirror_data.last_update_at >= NOW() - '30 seconds'::interval
gitlabhq_production-> ;
count
-------
888
(1 row)
gitlabhq_production=> SELECT COUNT(*) FROM projects INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
gitlabhq_production-> INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
gitlabhq_production(> WITH RECURSIVE "base_and_ancestors" AS (
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
gitlabhq_production(> UNION
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
gitlabhq_production(> ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
gitlabhq_production(> )
gitlabhq_production-> LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
gitlabhq_production-> LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
gitlabhq_production-> WHERE projects.mirror = true
gitlabhq_production-> AND project_mirror_data.retry_count <= 14
gitlabhq_production-> AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
gitlabhq_production->
gitlabhq_production-> AND project_mirror_data.status NOT IN ('scheduled', 'started')
gitlabhq_production-> AND project_mirror_data.next_execution_timestamp <= NOW() - '10 seconds'::interval
gitlabhq_production-> ;
count
-------
361
(1 row)
gitlabhq_production=> SELECT COUNT(*) FROM projects INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
gitlabhq_production-> INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
gitlabhq_production(> WITH RECURSIVE "base_and_ancestors" AS (
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
gitlabhq_production(> UNION
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
gitlabhq_production(> ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
gitlabhq_production(> )
gitlabhq_production-> LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
gitlabhq_production-> LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
gitlabhq_production-> WHERE projects.mirror = true
gitlabhq_production-> AND project_mirror_data.retry_count <= 14
gitlabhq_production-> AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
gitlabhq_production->
gitlabhq_production-> AND project_mirror_data.status IN ('scheduled', 'started')
gitlabhq_production-> ;
count
-------
25
(1 row)
gitlabhq_production=> SELECT COUNT(*) FROM projects INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
gitlabhq_production-> INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
gitlabhq_production(> WITH RECURSIVE "base_and_ancestors" AS (
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
gitlabhq_production(> UNION
gitlabhq_production(> (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
gitlabhq_production(> ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
gitlabhq_production(> )
gitlabhq_production-> LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
gitlabhq_production-> LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
gitlabhq_production-> WHERE projects.mirror = true
gitlabhq_production-> AND project_mirror_data.retry_count <= 14
gitlabhq_production-> AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
gitlabhq_production->
gitlabhq_production-> AND project_mirror_data.status = 'scheduled'
gitlabhq_production-> ;
count
-------
0
(1 row)
This query still isn't perfect: we have some data integrity issues where a project is public but its root namespace isn't (so the project isn't actually public and doesn't get mirroring for free), but this broadly matches what the application is doing
Edited by Sean McGivern