Do not query `sbom_occurrences` for all namespaces
What does this MR do and why?
Current group-level SBOM queries are querying the sbom_occurrences
table for all the namespaces under the given namespace. This causes unnecessary index lookups for namespaces without any related sbom_occurrences
.
In this MR, we are first running a loose index scan to get all the namespaces associated with sbom_occurrences
under the given namespace and then querying the sbom_occurrences
for those namespaces only.
The new query still uses many buffers for large namespaces with many child namespaces, but it uses almost half of the old query.
Database review
Old query
WITH "namespaces" AS MATERIALIZED (
SELECT
"namespaces"."traversal_ids"
FROM
"namespaces"
WHERE
"namespaces"."type" = 'Group'
AND (
traversal_ids @> (
'{9970}'
)
))
SELECT
"outer_occurrences"."component_id",
"outer_occurrences"."component_version_id",
MIN(outer_occurrences.id)::bigint AS id,
MIN(outer_occurrences.package_manager) AS package_manager,
MIN(outer_occurrences.input_file_path) AS input_file_path,
MIN(outer_occurrences.licenses -> 0 ->> 'spdx_identifier') AS primary_license_spdx_identifier,
COALESCE(JSONB_AGG(outer_occurrences.licenses -> 0) FILTER (WHERE outer_occurrences.licenses -> 0 IS NOT NULL), '[]') AS licenses,
SUM(counts.occurrence_count)::integer AS occurrence_count,
SUM(counts.vulnerability_count)::integer AS vulnerability_count,
SUM(counts.project_count)::integer AS project_count
FROM ( SELECT DISTINCT ON ("inner_occurrences"."component_id", "inner_occurrences"."component_version_id")
"inner_occurrences"."id",
"inner_occurrences"."created_at",
"inner_occurrences"."updated_at",
"inner_occurrences"."component_version_id",
"inner_occurrences"."project_id",
"inner_occurrences"."pipeline_id",
"inner_occurrences"."source_id",
"inner_occurrences"."commit_sha",
"inner_occurrences"."component_id",
"inner_occurrences"."uuid",
"inner_occurrences"."package_manager",
"inner_occurrences"."component_name",
"inner_occurrences"."input_file_path",
"inner_occurrences"."licenses",
"inner_occurrences"."highest_severity",
"inner_occurrences"."vulnerability_count",
"inner_occurrences"."source_package_id",
"inner_occurrences"."archived",
"inner_occurrences"."traversal_ids",
"inner_occurrences"."ancestors"
FROM
namespaces,
LATERAL ( SELECT DISTINCT ON ("sbom_occurrences"."component_id", "sbom_occurrences"."component_version_id")
"sbom_occurrences"."id",
"sbom_occurrences"."created_at",
"sbom_occurrences"."updated_at",
"sbom_occurrences"."component_version_id",
"sbom_occurrences"."project_id",
"sbom_occurrences"."pipeline_id",
"sbom_occurrences"."source_id",
"sbom_occurrences"."commit_sha",
"sbom_occurrences"."component_id",
"sbom_occurrences"."uuid",
"sbom_occurrences"."package_manager",
"sbom_occurrences"."component_name",
"sbom_occurrences"."input_file_path",
"sbom_occurrences"."licenses",
"sbom_occurrences"."highest_severity",
"sbom_occurrences"."vulnerability_count",
"sbom_occurrences"."source_package_id",
"sbom_occurrences"."archived",
"sbom_occurrences"."traversal_ids",
"sbom_occurrences"."ancestors"
FROM
"sbom_occurrences"
WHERE (sbom_occurrences.traversal_ids = namespaces.traversal_ids::bigint[])
AND "sbom_occurrences"."archived" = FALSE
ORDER BY
"sbom_occurrences"."component_id" ASC,
"sbom_occurrences"."component_version_id" ASC
LIMIT 21) inner_occurrences
ORDER BY
"inner_occurrences"."component_id" ASC,
"inner_occurrences"."component_version_id" ASC
LIMIT 21) outer_occurrences,
LATERAL (
SELECT
COUNT(project_id) AS occurrence_count,
COUNT(DISTINCT project_id) project_count,
SUM(vulnerability_count) vulnerability_count
FROM
"sbom_occurrences"
WHERE (traversal_ids >= ('{9970}'))
AND (traversal_ids < ('{9971}'))
AND "sbom_occurrences"."archived" = FALSE
AND (sbom_occurrences.component_version_id = outer_occurrences.component_version_id)) counts
GROUP BY
"outer_occurrences"."component_id",
"outer_occurrences"."component_version_id"
ORDER BY
MIN("outer_occurrences"."component_id") ASC,
MIN("outer_occurrences"."component_version_id") ASC
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94345 https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94352
New query
SELECT
"outer_occurrences"."component_id",
"outer_occurrences"."component_version_id",
MIN(outer_occurrences.id)::bigint AS id,
MIN(outer_occurrences.package_manager) AS package_manager,
MIN(outer_occurrences.input_file_path) AS input_file_path,
MIN(outer_occurrences.licenses -> 0 ->> 'spdx_identifier') AS primary_license_spdx_identifier,
COALESCE(JSONB_AGG(outer_occurrences.licenses -> 0) FILTER (WHERE outer_occurrences.licenses -> 0 IS NOT NULL), '[]') AS licenses,
SUM(counts.occurrence_count)::integer AS occurrence_count,
SUM(counts.vulnerability_count)::integer AS vulnerability_count,
SUM(counts.project_count)::integer AS project_count
FROM ( SELECT DISTINCT ON ("inner_occurrences"."component_id", "inner_occurrences"."component_version_id")
"inner_occurrences"."id",
"inner_occurrences"."created_at",
"inner_occurrences"."updated_at",
"inner_occurrences"."component_version_id",
"inner_occurrences"."project_id",
"inner_occurrences"."pipeline_id",
"inner_occurrences"."source_id",
"inner_occurrences"."commit_sha",
"inner_occurrences"."component_id",
"inner_occurrences"."uuid",
"inner_occurrences"."package_manager",
"inner_occurrences"."component_name",
"inner_occurrences"."input_file_path",
"inner_occurrences"."licenses",
"inner_occurrences"."highest_severity",
"inner_occurrences"."vulnerability_count",
"inner_occurrences"."source_package_id",
"inner_occurrences"."archived",
"inner_occurrences"."traversal_ids",
"inner_occurrences"."ancestors"
FROM ( WITH RECURSIVE "loose_index_scan_cte" AS (
(
SELECT
"sbom_occurrences"."traversal_ids"
FROM
"sbom_occurrences"
WHERE (traversal_ids >= ('{9970}'))
AND (traversal_ids < ('{9971}'))
AND "sbom_occurrences"."archived" = FALSE
ORDER BY
"sbom_occurrences"."traversal_ids" ASC
LIMIT 1)
UNION (
SELECT
(
SELECT
"sbom_occurrences"."traversal_ids"
FROM
"sbom_occurrences"
WHERE (traversal_ids >= ('{9970}'))
AND (traversal_ids < ('{9971}'))
AND "sbom_occurrences"."archived" = FALSE
AND "sbom_occurrences"."traversal_ids" > "loose_index_scan_cte"."traversal_ids"
ORDER BY
"sbom_occurrences"."traversal_ids" ASC
LIMIT 1) AS traversal_ids
FROM
"loose_index_scan_cte"))
SELECT
"traversal_ids"
FROM
"loose_index_scan_cte" AS "sbom_occurrences"
WHERE
"sbom_occurrences"."traversal_ids" IS NOT NULL) AS namespaces,
LATERAL ( SELECT DISTINCT ON ("sbom_occurrences"."component_id", "sbom_occurrences"."component_version_id")
"sbom_occurrences"."id",
"sbom_occurrences"."created_at",
"sbom_occurrences"."updated_at",
"sbom_occurrences"."component_version_id",
"sbom_occurrences"."project_id",
"sbom_occurrences"."pipeline_id",
"sbom_occurrences"."source_id",
"sbom_occurrences"."commit_sha",
"sbom_occurrences"."component_id",
"sbom_occurrences"."uuid",
"sbom_occurrences"."package_manager",
"sbom_occurrences"."component_name",
"sbom_occurrences"."input_file_path",
"sbom_occurrences"."licenses",
"sbom_occurrences"."highest_severity",
"sbom_occurrences"."vulnerability_count",
"sbom_occurrences"."source_package_id",
"sbom_occurrences"."archived",
"sbom_occurrences"."traversal_ids",
"sbom_occurrences"."ancestors"
FROM
"sbom_occurrences"
WHERE (sbom_occurrences.traversal_ids = namespaces.traversal_ids)
AND "sbom_occurrences"."archived" = FALSE
ORDER BY
"sbom_occurrences"."component_id" ASC,
"sbom_occurrences"."component_version_id" ASC
LIMIT 21) inner_occurrences
ORDER BY
"inner_occurrences"."component_id" ASC,
"inner_occurrences"."component_version_id" ASC
LIMIT 21) outer_occurrences,
LATERAL (
SELECT
COUNT(project_id) AS occurrence_count,
COUNT(DISTINCT project_id) project_count,
SUM(vulnerability_count) vulnerability_count
FROM
"sbom_occurrences"
WHERE (traversal_ids >= ('{9970}'))
AND (traversal_ids < ('{9971}'))
AND "sbom_occurrences"."archived" = FALSE
AND (sbom_occurrences.component_version_id = outer_occurrences.component_version_id)) counts
GROUP BY
"outer_occurrences"."component_id",
"outer_occurrences"."component_version_id"
ORDER BY
MIN("outer_occurrences"."component_id") ASC,
MIN("outer_occurrences"."component_version_id") ASC
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94347 https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94350