Optimize vulnerability export at group level
What does this MR do and why?
This change attempts to optimize the vulnerability export at the group level by reducing the amount of memory required to produce the export and optimizing database queries. At the moment, the sidekiq worker that attempts to run this job is shutdown before the job finishes which means that we are never able to produce the full report. By optimizing this query we make it possible to produce a full report.
Before query
SELECT id
FROM (
WITH RECURSIVE "array_cte" AS MATERIALIZED (
SELECT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces" WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
) AND "projects"."archived" = FALSE
),
"recursive_keyset_cte" AS (
(
SELECT NULL::bigint AS id, array_cte_id_array, vulnerabilities_id_array, 0::bigint AS count
FROM (
SELECT ARRAY_AGG("array_cte"."id") AS array_cte_id_array, ARRAY_AGG("vulnerabilities"."id") AS vulnerabilities_id_array
FROM (SELECT "array_cte"."id" FROM array_cte) array_cte
LEFT JOIN LATERAL (
SELECT "vulnerabilities"."id" AS id
FROM "vulnerabilities"
WHERE "vulnerabilities"."project_id" = "array_cte"."id"
ORDER BY "vulnerabilities"."id" DESC
LIMIT 1
) vulnerabilities ON TRUE WHERE "vulnerabilities"."id" IS NOT NULL
) array_scope_lateral_query LIMIT 1
)
UNION ALL
(
SELECT recursive_keyset_cte.vulnerabilities_id_array[position], array_cte_id_array, recursive_keyset_cte.vulnerabilities_id_array[:position_query.position-1]||next_cursor_values.id||recursive_keyset_cte.vulnerabilities_id_array[position_query.position+1:], recursive_keyset_cte.count + 1
FROM recursive_keyset_cte,
LATERAL (
SELECT id, position
FROM UNNEST(vulnerabilities_id_array) WITH ORDINALITY AS u(id, position)
WHERE id IS NOT NULL
ORDER BY 1 DESC
LIMIT 1
) AS position_query,
LATERAL (
SELECT "record"."id"
FROM (VALUES (NULL)) AS nulls
LEFT JOIN (
SELECT "vulnerabilities"."id" AS id
FROM "vulnerabilities"
WHERE "vulnerabilities"."project_id" = recursive_keyset_cte.array_cte_id_array[position]
AND ("vulnerabilities"."id" < recursive_keyset_cte.vulnerabilities_id_array[position])
ORDER BY "vulnerabilities"."id" DESC
LIMIT 1
) record ON TRUE LIMIT 1
) AS next_cursor_values
)
)
SELECT id
FROM "recursive_keyset_cte" AS "vulnerabilities"
WHERE (count <> 0)
) vulnerabilities LIMIT 500;
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/27089/commands/84270
After query
SELECT
"vulnerability_reads".*
FROM
"vulnerability_reads"
WHERE
"vulnerability_reads"."traversal_ids" >= '{9970}'
AND "vulnerability_reads"."traversal_ids" < '{9971}'
AND "vulnerability_reads"."archived" = FALSE
ORDER BY
"vulnerability_reads"."traversal_ids" ASC,
"vulnerability_reads"."vulnerability_id" ASC
LIMIT 500
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/27166/commands/84456
The full list of queries can be found in this snippet.
MR acceptance checklist
Please evaluate this MR against the MR acceptance checklist. It helps you analyze changes to reduce risks in quality, performance, reliability, security, and maintainability.
Screenshots or screen recordings
Before | After |
---|---|
Before | After |
How to set up and validate locally
- In rails console enable the experiment fully
Feature.enable(:optimized_vulnerability_report_export)
- Visit any group vulnerability report
- Click the
Export
button. - In rails console export the report.
VulnerabilityExports::ExportService.export(Vulnerabilities::Export.last)