Support GKE Workload Identity authentication
Summary
GKE Workload Identity, like AWS EKS IRSA, is the recommended authentication method for Kubernetes based workloads. In these setups Kubernetes Service Accounts are in turn directly attached to Cloud Provider Service Accounts that have the permissions required to access cloud services. This is an improvement security wise over Service Account keys or Node based authentication (where pods will attempt to use the Service Account attached to their underlying node instead). For both methods of trying to authenticate via a Service Account this is known as using Application Default Credentials (ADC).
After extensive testing each authentication method with GitLab the following was found:
Access | Service Account key | Node Service Account (ADC) | Workload Identity (ADC) |
---|---|---|---|
Object Storage - Normal (uploads, etc..._ | |||
Object Storage - Registry | |||
Object Storage - Backups (gsutil ) |
While Service Account keys worked fully, Node based SA authentication failed for backups. This was already known previously as the gsutil
CLI the Toolbox container uses is hardcoded currently to only expect a key.
Workload Identity though, which is a very similar path to Node based authentication but the Service Accounts are directly linked to pods, did not work in any fashion. After checking the setup in full it posted the curious error PERMISSION_DENIED: Request had insufficient authentication scopes
on all attempts to access Object Storage.
`PERMISSION_DENIED` error log
{
"component": "gitlab",
"subcomponent": "exceptions_json",
"level": "error",
"severity": "ERROR",
"time": "2023-07-14T10:05:11.203Z",
"correlation_id": "01H59XS1QT1DHW9QYZ11XRMZN7",
"meta.caller_id": "RepositoryImportWorker",
"meta.remote_ip": "<redacted>",
"meta.feature_category": "importers",
"meta.user": "root",
"meta.user_id": 1,
"meta.project": "root/test_sample1",
"meta.root_namespace": "root",
"meta.client_id": "user/1",
"meta.root_caller_id": "ProjectsController#create",
"exception.class": "Google::Apis::ClientError",
"exception.message": "PERMISSION_DENIED: Request had insufficient authentication scopes.",
"exception.backtrace": [
"config/initializers/carrierwave_s3_encryption_headers_patch.rb:53:in `authenticated_url`",
"config/initializers/carrierwave_performance_patch.rb:25:in `url`",
"lib/gitlab/import_export/command_line_util.rb:60:in `download_or_copy_upload`",
"lib/gitlab/import_export/file_importer.rb:80:in `remote_download_or_download_or_copy_upload`",
"lib/gitlab/import_export/file_importer.rb:67:in `copy_archive`",
"lib/gitlab/import_export/file_importer.rb:27:in `import`",
"lib/gitlab/import_export/file_importer.rb:13:in `import`",
"lib/gitlab/import_export/importer.rb:48:in `import_file`",
"lib/gitlab/import_export/importer.rb:21:in `execute`",
"app/services/projects/import_service.rb:143:in `import_data`",
"app/services/projects/import_service.rb:27:in `execute`",
"app/workers/repository_import_worker.rb:24:in `perform`",
"ee/app/workers/ee/repository_import_worker.rb:9:in `perform`",
"lib/gitlab/sidekiq_middleware/defer_jobs.rb:33:in `call`",
"lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb:29:in `call`",
"lib/gitlab/sidekiq_middleware/duplicate_jobs/strategies/until_executing.rb:16:in `perform`",
"lib/gitlab/sidekiq_middleware/duplicate_jobs/duplicate_job.rb:44:in `perform`",
"lib/gitlab/sidekiq_middleware/duplicate_jobs/server.rb:8:in `call`",
"lib/gitlab/sidekiq_middleware/worker_context.rb:9:in `wrap_in_optional_context`",
"lib/gitlab/sidekiq_middleware/worker_context/server.rb:19:in `block in call`",
"lib/gitlab/application_context.rb:118:in `block in use`",
"lib/gitlab/application_context.rb:118:in `use`",
"lib/gitlab/application_context.rb:57:in `with_context`",
"lib/gitlab/sidekiq_middleware/worker_context/server.rb:17:in `call`",
"lib/gitlab/sidekiq_status/server_middleware.rb:7:in `call`",
"lib/gitlab/sidekiq_versioning/middleware.rb:9:in `call`",
"lib/gitlab/sidekiq_middleware/query_analyzer.rb:7:in `block in call`",
"lib/gitlab/database/query_analyzer.rb:37:in `within`",
"lib/gitlab/sidekiq_middleware/query_analyzer.rb:7:in `call`",
"lib/gitlab/sidekiq_middleware/admin_mode/server.rb:14:in `call`",
"lib/gitlab/sidekiq_middleware/instrumentation_logger.rb:9:in `call`",
"lib/gitlab/sidekiq_middleware/batch_loader.rb:7:in `call`",
"lib/gitlab/sidekiq_middleware/extra_done_log_metadata.rb:7:in `call`",
"lib/gitlab/sidekiq_middleware/request_store_middleware.rb:10:in `block in call`",
"lib/gitlab/with_request_store.rb:17:in `enabling_request_store`",
"lib/gitlab/with_request_store.rb:10:in `with_request_store`",
"lib/gitlab/sidekiq_middleware/request_store_middleware.rb:9:in `call`",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:83:in `block in call`",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:110:in `block in instrument`",
"lib/gitlab/metrics/background_transaction.rb:33:in `run`",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:110:in `instrument`",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:82:in `call`",
"lib/gitlab/sidekiq_middleware/monitor.rb:10:in `block in call`",
"lib/gitlab/sidekiq_daemon/monitor.rb:46:in `within_job`",
"lib/gitlab/sidekiq_middleware/monitor.rb:9:in `call`",
"lib/gitlab/sidekiq_middleware/size_limiter/server.rb:13:in `call`",
"lib/gitlab/sidekiq_logging/structured_logger.rb:21:in `call`"
],
"user.username": "root",
"tags.program": "sidekiq",
"tags.locale": "en",
"tags.feature_category": "importers",
"tags.correlation_id": "01H59XS1QT1DHW9QYZ11XRMZN7",
"extra.sidekiq": {
"retry": false,
"queue": "default",
"version": 0,
"backtrace": 5,
"dead": false,
"status_expiration": 86400,
"args": [
"1"
],
"class": "RepositoryImportWorker",
"jid": "ab1dac5e97335dd5c3db089c",
"created_at": 1689329110.5359814,
"correlation_id": "01H59XS1QT1DHW9QYZ11XRMZN7",
"meta.caller_id": "ProjectsController#create",
"meta.remote_ip": "86.139.189.189",
"meta.feature_category": "importers",
"meta.user": "root",
"meta.user_id": 1,
"meta.project": "root/test_sample1",
"meta.root_namespace": "root",
"meta.client_id": "user/1",
"meta.root_caller_id": "ProjectsController#create",
"worker_data_consistency": "always",
"idempotency_key": "resque:gitlab:duplicate:default:1af4019a75553f37cc5480504f43eecd097065d10c91b25179cbcab8ae32f890",
"size_limiter": "validated",
"enqueued_at": 1689329110.5413666
},
"extra.importer": "Import/Export",
"extra.project_id": 1,
"extra.project_name": "test_sample1",
"extra.project_path": "root/test_sample1",
"extra.import_jid": "ab1dac5e97335dd5c3db089c"
}
This error typically refers to legacy Access Scopes. However, while legacy, Access Scopes are still part of each authentication call made on GCP and need to be correct to allow the calls to go through. What makes this curious however for a Workload Identity setup is that there's no option to set Access Scopes, which can only be set when attaching a Service Account to a node. But even with this GCP defaults access scopes to `cloud-platform`, which is sufficient enough for Object Storage access so this error is more confusing.
Overall then something is definitely amiss and may be client side in how we make calls.
Analysis
@pguinoiseau has delved, and opened https://github.com/fog/fog-google/issues/599, upstream in fog-google
. There is a workaround, but the implication of this include very significant increase in the API calls to Google, likely to run into rate limits in deployments over a certain volume of traffic.