Remove occurrence and project counts
What does this MR do and why?
This MR attempts to optimize the group.sbom_occurrences
query by removing the project_count
and occurrence_count
aggregations. These aggregations make it difficult to move towards using efficient IN
operator queries. I would like to propose that we render each detected dependency as a separate row in the UI. I think that this is beneficial for a performance perspective but it also helps with the data export functionality because the export would include each unique detected dependency instead of a single row for each dependency and the # of projects that they were detected in.
To accomplish this I need to change the UI to be able to render each unique occurrence detected instead of aggregating them by component/version. See the screenshots below.
Before:
WITH "our_occurrences" AS MATERIALIZED (
SELECT "sbom_occurrences".*
FROM "sbom_occurrences"
WHERE "sbom_occurrences"."project_id" IN (
SELECT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
)
AND "projects"."marked_for_deletion_at" IS NULL
AND "projects"."pending_delete" = FALSE
)
)
SELECT sbom_occurrences.*, agg_occurrences.occurrence_count, agg_occurrences.project_count
FROM "sbom_occurrences"
INNER JOIN (
SELECT component_id,
COUNT(DISTINCT id) AS occurrence_count,
COUNT(DISTINCT project_id) AS project_count
FROM our_occurrences
GROUP BY component_id
) agg_occurrences ON sbom_occurrences.component_id = agg_occurrences.component_id
WHERE "sbom_occurrences"."id" IN (
SELECT "our_occurrences"."id"
FROM "our_occurrences"
)
ORDER BY "sbom_occurrences"."id" ASC
LIMIT 20 OFFSET 0;
Time: 21.698 s
- planning: 18.821 ms
- execution: 21.679 s
- I/O read: 20.094 s
- I/O write: 0.000 ms
Shared buffers:
- hits: 407930 (~3.10 GiB) from the buffer pool
- reads: 44820 (~350.20 MiB) from the OS file cache, including disk I/O
- dirtied: 0
- writes: 0
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/22806/commands/73586
After:
SELECT "sbom_occurrences".*
FROM "sbom_occurrences"
WHERE "sbom_occurrences"."project_id" IN (
SELECT DISTINCT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
)
AND "projects"."marked_for_deletion_at" IS NULL
AND "projects"."pending_delete" = FALSE
)
ORDER BY "sbom_occurrences"."id" ASC
LIMIT 20
OFFSET 0;
Time: 12.846 s
- planning: 8.855 ms
- execution: 12.837 s
- I/O read: 12.612 s
- I/O write: 0.000 ms
Shared buffers:
- hits: 1620 (~12.70 MiB) from the buffer pool
- reads: 4711 (~36.80 MiB) from the OS file cache, including disk I/O
- dirtied: 190 (~1.50 MiB)
- writes: 0
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/23222/commands/74708
sort by package manager
SELECT "sbom_occurrences".*
FROM "sbom_occurrences"
WHERE "sbom_occurrences"."project_id" IN (
SELECT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
)
AND "projects"."marked_for_deletion_at" IS NULL
AND "projects"."pending_delete" = FALSE
)
ORDER BY "sbom_occurrences"."package_manager" ASC
LIMIT 20 OFFSET 0
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/22835/commands/73667
sort by component name
SELECT "sbom_occurrences".*
FROM "sbom_occurrences"
WHERE "sbom_occurrences"."project_id" IN (
SELECT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
)
AND "projects"."marked_for_deletion_at" IS NULL
AND "projects"."pending_delete" = FALSE
)
ORDER BY "sbom_occurrences"."component_name" ASC
LIMIT 20 OFFSET 0
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/22835/commands/73668
sort by license
SELECT "sbom_occurrences".*
FROM "sbom_occurrences"
WHERE "sbom_occurrences"."project_id" IN (
SELECT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
)
AND "projects"."marked_for_deletion_at" IS NULL
AND "projects"."pending_delete" = FALSE
)
ORDER BY
(licenses#>'{0,spdx_identifier}')::text ASC,
(licenses#>'{1,spdx_identifier}')::text ASC
LIMIT 20 OFFSET 0
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/22835/commands/73669
search by package manager
SELECT "sbom_occurrences".*
FROM "sbom_occurrences"
WHERE "sbom_occurrences"."project_id" IN (
SELECT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
)
AND "projects"."marked_for_deletion_at" IS NULL
AND "projects"."pending_delete" = FALSE
)
AND "sbom_occurrences"."package_manager" = 'npm'
ORDER BY "sbom_occurrences"."id" ASC
LIMIT 20 OFFSET 0
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/22835/commands/73670
search by component name
SELECT "sbom_occurrences".*
FROM "sbom_occurrences"
WHERE "sbom_occurrences"."project_id" IN (
SELECT "projects"."id"
FROM "projects"
WHERE "projects"."namespace_id" IN (
SELECT namespaces.traversal_ids[array_length(namespaces.traversal_ids, 1)] AS id
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}'))
)
AND "projects"."marked_for_deletion_at" IS NULL
AND "projects"."pending_delete" = FALSE
)
AND "sbom_occurrences"."component_name" = 'activerecord'
ORDER BY "sbom_occurrences"."id" ASC
LIMIT 20 OFFSET 0
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/22835/commands/73671
Screenshots or screen recordings
Before | After |
---|---|
MR acceptance checklist
This checklist encourages us to confirm any changes have been analyzed to reduce risks in quality, performance, reliability, security, and maintainability.
-
I have evaluated the MR acceptance checklist for this MR.