From cc7b009f5f0ab75336255ba466faf5bfb3f050d1 Mon Sep 17 00:00:00 2001 From: ChrisBAshton Date: Thu, 23 Jul 2020 17:28:59 +0100 Subject: [PATCH] Use ruby stdlib 'Random' for localised deterministic randomness Prior to this commit, we relied on the global random seed set via 'srand', insofar as if somebody set 'srand', we'd return the same result every time. However, this could be surprising behaviour, as devs calling RandomExample would expect a random example every time it is called. We decided in an earlier [discussion] that it would be better to pass an explicit 'seed' parameter if the dev wants consistent results. Whilst we could have used this 'seed' parameter to call 'srand(seed)', the behaviour becomes non-deterministic because it changes the nature of the global environment. I.e. if the dev is relying on setting 'srand' for some other part of their test suite, we would now have skewed it by calling 'srand' again. Hence the need to move to a sandbox/isolated way of setting seeded random behaviour. We used 'Random.new(seed)' and called all randomness-inducing methods on that returned object, i.e. 'rand => @random.rand', and 'arr.sample => arr.sample(random: @random)'. [discussion]: https://github.com/alphagov/govuk_schemas/pull/56/files\#r458182188 --- lib/govuk_schemas/random_content_generator.rb | 202 +++++++++--------- lib/govuk_schemas/random_schema_generator.rb | 27 +-- spec/lib/random_content_generator_spec.rb | 2 +- 3 files changed, 117 insertions(+), 114 deletions(-) diff --git a/lib/govuk_schemas/random_content_generator.rb b/lib/govuk_schemas/random_content_generator.rb index 7996fbe..edc730b 100644 --- a/lib/govuk_schemas/random_content_generator.rb +++ b/lib/govuk_schemas/random_content_generator.rb @@ -1,121 +1,123 @@ module GovukSchemas # @private - module RandomContentGenerator - class << self - WORDS = %w[Lorem ipsum dolor sit amet consectetur adipiscing elit. Ut suscipit at mauris non bibendum. Ut ac massa est. Aenean tempor imperdiet leo vel interdum. Nam sagittis cursus sem ultricies scelerisque. Quisque porttitor risus vel risus finibus eu sollicitudin nisl aliquet. Sed sed lectus ac dolor molestie interdum. Nam molestie pellentesque purus ac vestibulum. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Suspendisse non tempor eros. Mauris eu orci hendrerit volutpat lorem in tristique libero. Duis a nibh nibh.].freeze - - def string_for_type(type) - if type == "date-time" - time - elsif type == "uri" - uri - else - raise "Unknown attribute type `#{type}`" - end - end + class RandomContentGenerator + WORDS = %w[Lorem ipsum dolor sit amet consectetur adipiscing elit. Ut suscipit at mauris non bibendum. Ut ac massa est. Aenean tempor imperdiet leo vel interdum. Nam sagittis cursus sem ultricies scelerisque. Quisque porttitor risus vel risus finibus eu sollicitudin nisl aliquet. Sed sed lectus ac dolor molestie interdum. Nam molestie pellentesque purus ac vestibulum. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Suspendisse non tempor eros. Mauris eu orci hendrerit volutpat lorem in tristique libero. Duis a nibh nibh.].freeze - def time - seconds_ago = rand(-5_000_000..4_999_999) - (Time.now + seconds_ago).iso8601 - end + def initialize(random: Random.new) + @random = random + end - # TODO: make this more random with query string, optional anchor. - def uri - "http://example.com#{base_path}#{anchor}" + def string_for_type(type) + if type == "date-time" + time + elsif type == "uri" + uri + else + raise "Unknown attribute type `#{type}`" end + end - def base_path - "/" + rand(1..5).times.map { uuid }.join("/") - end + def time + seconds_ago = @random.rand(-5_000_000..4_999_999) + (Time.now + seconds_ago).iso8601 + end - def govuk_subdomain_url - subdomain = rand(2..4).times.map { - ("a".."z").to_a.sample(rand(3..8)).join - }.join(".") - "https://#{subdomain}.gov.uk#{base_path}" - end + # TODO: make this more random with query string, optional anchor. + def uri + "http://example.com#{base_path}#{anchor}" + end - def string(minimum_chars = nil, maximum_chars = nil) - minimum_chars ||= 0 - maximum_chars ||= 100 - WORDS.sample(rand(minimum_chars..maximum_chars)).join(" ") - end + def base_path + "/" + @random.rand(1..5).times.map { uuid }.join("/") + end - def bool - rand(2) == 1 - end + def govuk_subdomain_url + subdomain = @random.rand(2..4).times.map { + ("a".."z").to_a.sample(@random.rand(3..8), random: @random).join + }.join(".") + "https://#{subdomain}.gov.uk#{base_path}" + end - def anchor - "##{hex}" - end + def string(minimum_chars = nil, maximum_chars = nil) + minimum_chars ||= 0 + maximum_chars ||= 100 + WORDS.sample(@random.rand(minimum_chars..maximum_chars), random: @random).join(" ") + end - def random_identifier(separator:) - Utils.parameterize(WORDS.sample(rand(1..10)).join("-")).gsub("-", separator) - end + def bool + @random.rand(2) == 1 + end - def uuid - # matches uuid regex e.g. e058aad7-ce86-5181-8801-4ddcb3c8f27c - # /^[a-f0-9]{8}-[a-f0-9]{4}-[1-5][a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$/ - "#{hex(8)}-#{hex(4)}-1#{hex(3)}-a#{hex(3)}-#{hex(12)}" - end + def anchor + "##{hex}" + end - def hex(length = 10) - length.times.map { bool ? random_letter : random_number }.join("") - end + def random_identifier(separator:) + Utils.parameterize(WORDS.sample(@random.rand(1..10), random: @random).join("-")).gsub("-", separator) + end - def string_for_regex(pattern) - case pattern.to_s - when "^(placeholder|placeholder_.+)$" - ["placeholder", "placeholder_#{WORDS.sample}"].sample - when "^[a-f0-9]{8}-[a-f0-9]{4}-[1-5][a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$" - uuid - when "^/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?$" - base_path - when "^[1-9][0-9]{3}[-/](0[1-9]|1[0-2])[-/](0[1-9]|[12][0-9]|3[0-1])$" - Date.today.iso8601 - when "^[1-9][0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[0-1])$" - Date.today.iso8601 - when "^#.+$" - anchor - when "[a-z-]" - random_identifier(separator: "-") - when "^[a-z_]+$" - random_identifier(separator: "_") - when "^/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?(\\?([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?(#([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?$" - base_path - when "^https://([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[A-Za-z0-9])?\\.)+campaign\\.gov\\.uk(/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?(\\?([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?(#([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?)?$" - govuk_subdomain_url - when "^https://([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[A-Za-z0-9])?\\.)*gov\\.uk(/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?(\\?([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?(#([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?)?$" - govuk_subdomain_url - when '[a-z0-9\-_]' - "#{hex}-#{hex}" - else - raise <<-DOC - Don't know how to generate random string for pattern #{pattern.inspect} - - This propably means you've introduced a new regex in govuk-content-schemas. - Because it's very hard to generate a valid string from a regex alone, - we have to specify a method to generate random data for each regex in - the schemas. - - To fix this: - - - Add your regex to `lib/govuk_schemas/random.rb` - DOC - end - end + def uuid + # matches uuid regex e.g. e058aad7-ce86-5181-8801-4ddcb3c8f27c + # /^[a-f0-9]{8}-[a-f0-9]{4}-[1-5][a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$/ + "#{hex(8)}-#{hex(4)}-1#{hex(3)}-a#{hex(3)}-#{hex(12)}" + end - private + def hex(length = 10) + length.times.map { bool ? random_letter : random_number }.join("") + end - def random_letter - letters = ("a".."f").to_a - letters[rand(0..letters.count - 1)] + def string_for_regex(pattern) + case pattern.to_s + when "^(placeholder|placeholder_.+)$" + ["placeholder", "placeholder_#{WORDS.sample(random: @random)}"].sample(random: @random) + when "^[a-f0-9]{8}-[a-f0-9]{4}-[1-5][a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$" + uuid + when "^/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?$" + base_path + when "^[1-9][0-9]{3}[-/](0[1-9]|1[0-2])[-/](0[1-9]|[12][0-9]|3[0-1])$" + Date.today.iso8601 + when "^[1-9][0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[0-1])$" + Date.today.iso8601 + when "^#.+$" + anchor + when "[a-z-]" + random_identifier(separator: "-") + when "^[a-z_]+$" + random_identifier(separator: "_") + when "^/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?(\\?([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?(#([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?$" + base_path + when "^https://([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[A-Za-z0-9])?\\.)+campaign\\.gov\\.uk(/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?(\\?([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?(#([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?)?$" + govuk_subdomain_url + when "^https://([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[A-Za-z0-9])?\\.)*gov\\.uk(/(([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})+(/([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)*)?(\\?([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?(#([a-zA-Z0-9._~!$&'()*+,;=:@-]|%[0-9a-fA-F]{2})*)?)?$" + govuk_subdomain_url + when '[a-z0-9\-_]' + "#{hex}-#{hex}" + else + raise <<-DOC + Don't know how to generate random string for pattern #{pattern.inspect} + + This propably means you've introduced a new regex in govuk-content-schemas. + Because it's very hard to generate a valid string from a regex alone, + we have to specify a method to generate random data for each regex in + the schemas. + + To fix this: + + - Add your regex to `lib/govuk_schemas/random.rb` + DOC end + end - def random_number - numbers = ("0".."9").to_a - numbers[rand(0..numbers.count - 1)] - end + private + + def random_letter + letters = ("a".."f").to_a + letters[@random.rand(0..letters.count - 1)] + end + + def random_number + numbers = ("0".."9").to_a + numbers[@random.rand(0..numbers.count - 1)] end end end diff --git a/lib/govuk_schemas/random_schema_generator.rb b/lib/govuk_schemas/random_schema_generator.rb index c7be70b..223022f 100644 --- a/lib/govuk_schemas/random_schema_generator.rb +++ b/lib/govuk_schemas/random_schema_generator.rb @@ -12,7 +12,8 @@ module GovukSchemas class RandomSchemaGenerator def initialize(schema:, seed: nil) @schema = schema - srand(seed) unless seed.nil? + @random = Random.new(seed || rand) + @generator = RandomContentGenerator.new(random: @random) end def payload @@ -46,24 +47,24 @@ def generate_value(props) end # Make sure that we choose a type when there are more than one specified. - type = Array(type).sample + type = Array(type).sample(random: @random) if props["anyOf"] - generate_value(props["anyOf"].sample) + generate_value(props["anyOf"].sample(random: @random)) elsif props["oneOf"] && type != "object" # FIXME: Generating valid data for a `oneOf` schema is quite interesting. # According to the JSON Schema spec a `oneOf` schema is only valid if # the data is valid against *only one* of the clauses. To do this # properly, we'd have to verify that the data generated below doesn't # validate against the other schemas in `props['oneOf']`. - generate_value(props["oneOf"].sample) + generate_value(props["oneOf"].sample(random: @random)) elsif props["allOf"] props["allOf"].each_with_object({}) do |subschema, hash| val = generate_value(subschema) hash.merge(val) end elsif props["enum"] - props["enum"].sample + props["enum"].sample(random: @random) elsif type == "null" nil elsif type == "object" @@ -71,11 +72,11 @@ def generate_value(props) elsif type == "array" generate_random_array(props) elsif type == "boolean" - RandomContentGenerator.bool + @generator.bool elsif type == "integer" min = props["minimum"] || 0 max = props["maximum"] || 10 - rand(min..max) + @random.rand(min..max) elsif type == "string" generate_random_string(props) else @@ -86,14 +87,14 @@ def generate_value(props) def generate_random_object(subschema) document = {} - one_of_sample = subschema.fetch("oneOf", []).sample || {} + one_of_sample = subschema.fetch("oneOf", []).sample(random: @random) || {} (subschema["properties"] || {}).each do |attribute_name, attribute_properties| # TODO: When the schema contains `subschema['minProperties']` we always # populate all of the keys in the hash. This isn't quite random, but I # haven't found a nice way yet to ensure there's at least n elements in # the hash. - should_generate_value = RandomContentGenerator.bool \ + should_generate_value = @generator.bool \ || subschema["required"].to_a.include?(attribute_name) \ || (one_of_sample["required"] || {}).to_a.include?(attribute_name) \ || (one_of_sample["properties"] || {}).keys.include?(attribute_name) \ @@ -115,7 +116,7 @@ def generate_random_object(subschema) def generate_random_array(props) min = props["minItems"] || 0 max = props["maxItems"] || 10 - num_items = rand(min..max) + num_items = @random.rand(min..max) num_items.times.map do # sometimes arrays don't have `items` specified, not sure if this is a bug @@ -125,11 +126,11 @@ def generate_random_array(props) def generate_random_string(props) if props["format"] - RandomContentGenerator.string_for_type(props["format"]) + @generator.string_for_type(props["format"]) elsif props["pattern"] - RandomContentGenerator.string_for_regex(props["pattern"]) + @generator.string_for_regex(props["pattern"]) else - RandomContentGenerator.string(props["minLength"], props["maxLength"]) + @generator.string(props["minLength"], props["maxLength"]) end end diff --git a/spec/lib/random_content_generator_spec.rb b/spec/lib/random_content_generator_spec.rb index 7f7b480..0419fcf 100644 --- a/spec/lib/random_content_generator_spec.rb +++ b/spec/lib/random_content_generator_spec.rb @@ -3,7 +3,7 @@ RSpec.describe GovukSchemas::RandomContentGenerator do describe ".random_identifier" do it "generates a string" do - string = GovukSchemas::RandomContentGenerator.random_identifier(separator: "_") + string = GovukSchemas::RandomContentGenerator.new.random_identifier(separator: "_") expect(string).to be_a(String) end