fix: Unbias percent rollout strategy
x % 100
returns integer in 0..9
range, thus using <=
in a running
total gives 1 extra percent to the first variant, subtracting it from
the last:
experiment_id = lambda do |name, **context|
ingredients = context.keys + context.values
ingredients.map! { |o| (o.respond_to?(:to_global_id) ? o.to_global_id : o).to_s }
ingredients.unshift(name).unshift(Gitlab::Experiment::Configuration.context_key_secret)
context_digest = Digest::SHA2.new(Gitlab::Experiment::Configuration.context_key_bit_length).hexdigest(ingredients.join('|'))
"#{name}:#{context_digest}"
end
rollout = lambda do |distribution, name, **context|
crc = Zlib.crc32(experiment_id[name, **context])
total = 0
distribution.find { |_, percent| crc % 100 <= total += percent }.first
end
test_rollout = lambda do |ids, distribution|
tally = ids.map { rollout[distribution, :foobar, actor: _1] }.tally.transform_values do |value|
{ value: value, percent: format("%d%%", (100.0 * value / ids.size).round) }
end
Rails.logger.info("Distribution of #{ids.size} actors", {
distribution: distribution,
tally: tally
})
end
ids = Array.new(1_000_000, &:itself); :ok
test_rollout[ids.sample(100_000), { a: 5, b: 15, c: 30, d: 50 }]
test_rollout[ids.sample(200_000), { a: 5, b: 15, c: 30, d: 50 }]
test_rollout[ids.sample(300_000), { a: 5, b: 15, c: 30, d: 50 }]
test_rollout[ids.sample(400_000), { a: 5, b: 15, c: 30, d: 50 }]
test_rollout[ids.sample(500_000), { a: 5, b: 15, c: 30, d: 50 }]
test_rollout[ids.sample(600_000), { a: 5, b: 15, c: 30, d: 50 }]
test_rollout[ids.sample(700_000), { a: 5, b: 15, c: 30, d: 50 }]
Running this script will constantly assign 6% of users to variant a
, and 49% to variant d
. With the proposed fix it will match expectations.