Skip to content

Do not query `sbom_occurrences` for all namespaces

What does this MR do and why?

Current group-level SBOM queries are querying the sbom_occurrences table for all the namespaces under the given namespace. This causes unnecessary index lookups for namespaces without any related sbom_occurrences.

In this MR, we are first running a loose index scan to get all the namespaces associated with sbom_occurrences under the given namespace and then querying the sbom_occurrences for those namespaces only.

The new query still uses many buffers for large namespaces with many child namespaces, but it uses almost half of the old query.

Database review

Old query
WITH "namespaces" AS MATERIALIZED (
    SELECT
        "namespaces"."traversal_ids"
    FROM
        "namespaces"
    WHERE
        "namespaces"."type" = 'Group'
        AND (
            traversal_ids @> (
                '{9970}'
)
))
SELECT
    "outer_occurrences"."component_id",
    "outer_occurrences"."component_version_id",
    MIN(outer_occurrences.id)::bigint AS id,
    MIN(outer_occurrences.package_manager) AS package_manager,
    MIN(outer_occurrences.input_file_path) AS input_file_path,
    MIN(outer_occurrences.licenses -> 0 ->> 'spdx_identifier') AS primary_license_spdx_identifier,
    COALESCE(JSONB_AGG(outer_occurrences.licenses -> 0) FILTER (WHERE outer_occurrences.licenses -> 0 IS NOT NULL), '[]') AS licenses,
    SUM(counts.occurrence_count)::integer AS occurrence_count,
    SUM(counts.vulnerability_count)::integer AS vulnerability_count,
    SUM(counts.project_count)::integer AS project_count
FROM ( SELECT DISTINCT ON ("inner_occurrences"."component_id", "inner_occurrences"."component_version_id")
        "inner_occurrences"."id",
        "inner_occurrences"."created_at",
        "inner_occurrences"."updated_at",
        "inner_occurrences"."component_version_id",
        "inner_occurrences"."project_id",
        "inner_occurrences"."pipeline_id",
        "inner_occurrences"."source_id",
        "inner_occurrences"."commit_sha",
        "inner_occurrences"."component_id",
        "inner_occurrences"."uuid",
        "inner_occurrences"."package_manager",
        "inner_occurrences"."component_name",
        "inner_occurrences"."input_file_path",
        "inner_occurrences"."licenses",
        "inner_occurrences"."highest_severity",
        "inner_occurrences"."vulnerability_count",
        "inner_occurrences"."source_package_id",
        "inner_occurrences"."archived",
        "inner_occurrences"."traversal_ids",
        "inner_occurrences"."ancestors"
    FROM
        namespaces,
        LATERAL ( SELECT DISTINCT ON ("sbom_occurrences"."component_id", "sbom_occurrences"."component_version_id")
                "sbom_occurrences"."id",
                "sbom_occurrences"."created_at",
                "sbom_occurrences"."updated_at",
                "sbom_occurrences"."component_version_id",
                "sbom_occurrences"."project_id",
                "sbom_occurrences"."pipeline_id",
                "sbom_occurrences"."source_id",
                "sbom_occurrences"."commit_sha",
                "sbom_occurrences"."component_id",
                "sbom_occurrences"."uuid",
                "sbom_occurrences"."package_manager",
                "sbom_occurrences"."component_name",
                "sbom_occurrences"."input_file_path",
                "sbom_occurrences"."licenses",
                "sbom_occurrences"."highest_severity",
                "sbom_occurrences"."vulnerability_count",
                "sbom_occurrences"."source_package_id",
                "sbom_occurrences"."archived",
                "sbom_occurrences"."traversal_ids",
                "sbom_occurrences"."ancestors"
            FROM
                "sbom_occurrences"
            WHERE (sbom_occurrences.traversal_ids = namespaces.traversal_ids::bigint[])
            AND "sbom_occurrences"."archived" = FALSE
        ORDER BY
            "sbom_occurrences"."component_id" ASC,
            "sbom_occurrences"."component_version_id" ASC
        LIMIT 21) inner_occurrences
ORDER BY
    "inner_occurrences"."component_id" ASC,
    "inner_occurrences"."component_version_id" ASC
LIMIT 21) outer_occurrences,
    LATERAL (
        SELECT
            COUNT(project_id) AS occurrence_count,
            COUNT(DISTINCT project_id) project_count,
            SUM(vulnerability_count) vulnerability_count
        FROM
            "sbom_occurrences"
        WHERE (traversal_ids >= ('{9970}'))
        AND (traversal_ids < ('{9971}'))
        AND "sbom_occurrences"."archived" = FALSE
        AND (sbom_occurrences.component_version_id = outer_occurrences.component_version_id)) counts
GROUP BY
    "outer_occurrences"."component_id",
    "outer_occurrences"."component_version_id"
ORDER BY
    MIN("outer_occurrences"."component_id") ASC,
    MIN("outer_occurrences"."component_version_id") ASC

https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94345 https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94352

New query
SELECT
    "outer_occurrences"."component_id",
    "outer_occurrences"."component_version_id",
    MIN(outer_occurrences.id)::bigint AS id,
    MIN(outer_occurrences.package_manager) AS package_manager,
    MIN(outer_occurrences.input_file_path) AS input_file_path,
    MIN(outer_occurrences.licenses -> 0 ->> 'spdx_identifier') AS primary_license_spdx_identifier,
    COALESCE(JSONB_AGG(outer_occurrences.licenses -> 0) FILTER (WHERE outer_occurrences.licenses -> 0 IS NOT NULL), '[]') AS licenses,
    SUM(counts.occurrence_count)::integer AS occurrence_count,
    SUM(counts.vulnerability_count)::integer AS vulnerability_count,
    SUM(counts.project_count)::integer AS project_count
FROM ( SELECT DISTINCT ON ("inner_occurrences"."component_id", "inner_occurrences"."component_version_id")
        "inner_occurrences"."id",
        "inner_occurrences"."created_at",
        "inner_occurrences"."updated_at",
        "inner_occurrences"."component_version_id",
        "inner_occurrences"."project_id",
        "inner_occurrences"."pipeline_id",
        "inner_occurrences"."source_id",
        "inner_occurrences"."commit_sha",
        "inner_occurrences"."component_id",
        "inner_occurrences"."uuid",
        "inner_occurrences"."package_manager",
        "inner_occurrences"."component_name",
        "inner_occurrences"."input_file_path",
        "inner_occurrences"."licenses",
        "inner_occurrences"."highest_severity",
        "inner_occurrences"."vulnerability_count",
        "inner_occurrences"."source_package_id",
        "inner_occurrences"."archived",
        "inner_occurrences"."traversal_ids",
        "inner_occurrences"."ancestors"
    FROM ( WITH RECURSIVE "loose_index_scan_cte" AS (
(
                SELECT
                    "sbom_occurrences"."traversal_ids"
                FROM
                    "sbom_occurrences"
                WHERE (traversal_ids >= ('{9970}'))
                AND (traversal_ids < ('{9971}'))
                AND "sbom_occurrences"."archived" = FALSE
            ORDER BY
                "sbom_occurrences"."traversal_ids" ASC
            LIMIT 1)
    UNION (
        SELECT
            (
                SELECT
                    "sbom_occurrences"."traversal_ids"
                FROM
                    "sbom_occurrences"
                WHERE (traversal_ids >= ('{9970}'))
                AND (traversal_ids < ('{9971}'))
                AND "sbom_occurrences"."archived" = FALSE
                AND "sbom_occurrences"."traversal_ids" > "loose_index_scan_cte"."traversal_ids"
            ORDER BY
                "sbom_occurrences"."traversal_ids" ASC
            LIMIT 1) AS traversal_ids
FROM
    "loose_index_scan_cte"))
SELECT
    "traversal_ids"
FROM
    "loose_index_scan_cte" AS "sbom_occurrences"
WHERE
    "sbom_occurrences"."traversal_ids" IS NOT NULL) AS namespaces,
LATERAL ( SELECT DISTINCT ON ("sbom_occurrences"."component_id", "sbom_occurrences"."component_version_id")
        "sbom_occurrences"."id",
        "sbom_occurrences"."created_at",
        "sbom_occurrences"."updated_at",
        "sbom_occurrences"."component_version_id",
        "sbom_occurrences"."project_id",
        "sbom_occurrences"."pipeline_id",
        "sbom_occurrences"."source_id",
        "sbom_occurrences"."commit_sha",
        "sbom_occurrences"."component_id",
        "sbom_occurrences"."uuid",
        "sbom_occurrences"."package_manager",
        "sbom_occurrences"."component_name",
        "sbom_occurrences"."input_file_path",
        "sbom_occurrences"."licenses",
        "sbom_occurrences"."highest_severity",
        "sbom_occurrences"."vulnerability_count",
        "sbom_occurrences"."source_package_id",
        "sbom_occurrences"."archived",
        "sbom_occurrences"."traversal_ids",
        "sbom_occurrences"."ancestors"
    FROM
        "sbom_occurrences"
    WHERE (sbom_occurrences.traversal_ids = namespaces.traversal_ids)
    AND "sbom_occurrences"."archived" = FALSE
ORDER BY
    "sbom_occurrences"."component_id" ASC,
    "sbom_occurrences"."component_version_id" ASC
LIMIT 21) inner_occurrences
ORDER BY
    "inner_occurrences"."component_id" ASC,
    "inner_occurrences"."component_version_id" ASC
LIMIT 21) outer_occurrences,
    LATERAL (
        SELECT
            COUNT(project_id) AS occurrence_count,
            COUNT(DISTINCT project_id) project_count,
            SUM(vulnerability_count) vulnerability_count
        FROM
            "sbom_occurrences"
        WHERE (traversal_ids >= ('{9970}'))
        AND (traversal_ids < ('{9971}'))
        AND "sbom_occurrences"."archived" = FALSE
        AND (sbom_occurrences.component_version_id = outer_occurrences.component_version_id)) counts
GROUP BY
    "outer_occurrences"."component_id",
    "outer_occurrences"."component_version_id"
ORDER BY
    MIN("outer_occurrences"."component_id") ASC,
    MIN("outer_occurrences"."component_version_id") ASC

https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94347 https://console.postgres.ai/gitlab/gitlab-production-main/sessions/30414/commands/94350

Edited by Mehmet Emin INAC

Merge request reports

Loading