Skip to content

Commit

Permalink
Merge pull request #1058 from joshuagl/joshuagl/protected2public
Browse files Browse the repository at this point in the history
Make some helper functions for determining hashed bins part of repository_lib
  • Loading branch information
lukpueh authored Jun 30, 2020
2 parents 49031b4 + ed45fb2 commit de8649e
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 131 deletions.
130 changes: 116 additions & 14 deletions tuf/repository_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@
# Supported key types.
SUPPORTED_KEY_TYPES = ['rsa', 'ed25519', 'ecdsa-sha2-nistp256']

# The algorithm used by the repository to generate the path hash prefixes
# of hashed bin delegations. Please see delegate_hashed_bins()
HASH_FUNCTION = tuf.settings.DEFAULT_HASH_ALGORITHM


def _generate_and_write_metadata(rolename, metadata_filename,
targets_directory, metadata_directory, storage_backend,
Expand Down Expand Up @@ -1023,14 +1027,120 @@ def get_metadata_versioninfo(rolename, repository_name):



# TODO: Is this function needed? It does not seem used, also the same
# function exists as private method in updater.Updater._get_target_hash.
def create_bin_name(low, high, prefix_len):
"""
<Purpose>
Create a string name of a delegated hash bin, where name will be a range of
zero-padded (up to prefix_len) strings i.e. for low=00, high=07,
prefix_len=3 the returned name would be '000-007'.
<Arguments>
low:
The low end of the prefix range to be binned
high:
The high end of the prefix range to be binned
prefix_len:
The length of the prefix range components
<Returns>
A string bin name, with each end of the range zero-padded up to prefix_len
"""
if low == high:
return "{low:0{len}x}".format(low=low, len=prefix_len)

return "{low:0{len}x}-{high:0{len}x}".format(low=low, high=high,
len=prefix_len)





def get_bin_numbers(number_of_bins):
"""
<Purpose>
Given the desired number of bins (number_of_bins) calculate the prefix
length (prefix_length), total number of prefixes (prefix_count) and the
number of prefixes to be stored in each bin (bin_size).
Example: number_of_bins = 32
prefix_length = 2
prefix_count = 256
bin_size = 8
That is, each of the 32 hashed bins are responsible for 8 hash prefixes,
i.e. 00-07, 08-0f, ..., f8-ff.
<Arguments>
number_of_bins:
The number of hashed bins in use
<Returns>
A tuple of three values:
1. prefix_length: the length of each prefix
2. prefix_count: the total number of prefixes in use
3. bin_size: the number of hash prefixes to be stored in each bin
"""
# Convert 'number_of_bins' to hexadecimal and determine the number of
# hexadecimal digits needed by each hash prefix
prefix_length = len("{:x}".format(number_of_bins - 1))
# Calculate the total number of hash prefixes (e.g., 000 - FFF total values)
prefix_count = 16 ** prefix_length
# Determine how many prefixes to assign to each bin
bin_size = prefix_count // number_of_bins

# For simplicity, ensure that 'prefix_count' (16 ^ n) can be evenly
# distributed over 'number_of_bins' (must be 2 ^ n). Each bin will contain
# (prefix_count / number_of_bins) hash prefixes.
if prefix_count % number_of_bins != 0:
# Note: x % y != 0 does not guarantee that y is not a power of 2 for
# arbitrary x and y values. However, due to the relationship between
# number_of_bins and prefix_count, it is true for them.
raise securesystemslib.exceptions.Error('The "number_of_bins" argument'
' must be a power of 2.')

return prefix_length, prefix_count, bin_size





def find_bin_for_target_hash(target_hash, number_of_bins):
"""
<Purpose>
For a given hashed filename, target_hash, calculate the name of a hashed bin
into which this file would be delegated given number_of_bins bins are in
use.
<Arguments>
target_hash:
The hash of the target file's path
number_of_bins:
The number of hashed_bins in use
<Returns>
The name of the hashed bin target_hash would be binned into
"""

prefix_length, _, bin_size = get_bin_numbers(number_of_bins)

prefix = int(target_hash[:prefix_length], 16)

low = prefix - (prefix % bin_size)
high = (low + bin_size - 1)

return create_bin_name(low, high, prefix_length)





def get_target_hash(target_filepath):
"""
<Purpose>
Compute the hash of 'target_filepath'. This is useful in conjunction with
the "path_hash_prefixes" attribute in a delegated targets role, which
tells us which paths it is implicitly responsible for.
tells us which paths a role is implicitly responsible for.
The repository may optionally organize targets into hashed bins to ease
target delegations and role metadata management. The use of consistent
Expand All @@ -1053,17 +1163,9 @@ def get_target_hash(target_filepath):
"""
tuf.formats.RELPATH_SCHEMA.check_match(target_filepath)

# Calculate the hash of the filepath to determine which bin to find the
# target. The client currently assumes the repository uses
# 'tuf.settings.DEFAULT_HASH_ALGORITHM' to generate hashes and 'utf-8'.
digest_object = securesystemslib.hash.digest(
tuf.settings.DEFAULT_HASH_ALGORITHM)
encoded_target_filepath = target_filepath.encode('utf-8')
digest_object.update(encoded_target_filepath)
target_filepath_hash = digest_object.hexdigest()

return target_filepath_hash

digest_object = securesystemslib.hash.digest(algorithm=HASH_FUNCTION)
digest_object.update(target_filepath.encode('utf-8'))
return digest_object.hexdigest()



Expand Down
125 changes: 8 additions & 117 deletions tuf/repository_tool.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

#!/usr/bin/env python

# Copyright 2013 - 2017, New York University and the TUF contributors
Expand Down Expand Up @@ -103,10 +104,6 @@
# through 2031 and beyond.
DEFAULT_RSA_KEY_BITS=3072

# The algorithm used by the repository to generate the path hash prefixes
# of hashed bin delegations. Please see delegate_hashed_bins()
HASH_FUNCTION = tuf.settings.DEFAULT_HASH_ALGORITHM

# The default number of hashed bin delegations
DEFAULT_NUM_BINS=1024

Expand Down Expand Up @@ -2535,7 +2532,7 @@ def delegate_hashed_bins(self, list_of_targets, keys_of_hashed_bins,
securesystemslib.formats.ANYKEYLIST_SCHEMA.check_match(keys_of_hashed_bins)
tuf.formats.NUMBINS_SCHEMA.check_match(number_of_bins)

prefix_length, prefix_count, bin_size = _get_bin_numbers(number_of_bins)
prefix_length, prefix_count, bin_size = repo_lib.get_bin_numbers(number_of_bins)

logger.info('Creating hashed bin delegations.\n' +
repr(len(list_of_targets)) + ' total targets.\n' +
Expand All @@ -2549,7 +2546,7 @@ def delegate_hashed_bins(self, list_of_targets, keys_of_hashed_bins,
ordered_roles = []
for idx in range(0, prefix_count, bin_size):
high = idx + bin_size - 1
name = _create_bin_name(idx, high, prefix_length)
name = repo_lib.create_bin_name(idx, high, prefix_length)
if bin_size == 1:
target_hash_prefixes = [name]
else:
Expand All @@ -2573,7 +2570,7 @@ def delegate_hashed_bins(self, list_of_targets, keys_of_hashed_bins,
# Determine the hash prefix of 'target_path' by computing the digest of
# its path relative to the targets directory.
# We must hash a target path as it appears in the metadata
hash_prefix = _get_hash(target_path)[:prefix_length]
hash_prefix = repo_lib.get_target_hash(target_path)[:prefix_length]
ordered_roles[int(hash_prefix, 16) // bin_size]["target_paths"].append(target_path)

keyids, keydict = _keys_to_keydict(keys_of_hashed_bins)
Expand Down Expand Up @@ -2680,8 +2677,8 @@ def add_target_to_bin(self, target_filepath, number_of_bins=DEFAULT_NUM_BINS,

# TODO: check target_filepath is sane

path_hash = _get_hash(target_filepath)
bin_name = _find_bin_for_hash(path_hash, number_of_bins)
path_hash = repo_lib.get_target_hash(target_filepath)
bin_name = repo_lib.find_bin_for_target_hash(path_hash, number_of_bins)

# Ensure the Targets object has delegated to hashed bins
if not self._delegated_roles.get(bin_name, None):
Expand Down Expand Up @@ -2742,8 +2739,8 @@ def remove_target_from_bin(self, target_filepath,

# TODO: check target_filepath is sane?

path_hash = _get_hash(target_filepath)
bin_name = _find_bin_for_hash(path_hash, number_of_bins)
path_hash = repo_lib.get_target_hash(target_filepath)
bin_name = repo_lib.find_bin_for_target_hash(path_hash, number_of_bins)

# Ensure the Targets object has delegated to hashed bins
if not self._delegated_roles.get(bin_name, None):
Expand Down Expand Up @@ -2845,112 +2842,6 @@ def _keys_to_keydict(keys):




def _get_hash(target_filepath):
"""
<Purpose>
Generate a hash of target_filepath, a path to a file (not the file
itself), using HASH_FUNCTION
<Arguments>
target_filepath:
A path to a targetfile, relative to the targets directory
<Returns>
The hexdigest hash of the filepath.
"""

# TODO: ensure target_filepath is relative to targets_directory?
digest_object = securesystemslib.hash.digest(algorithm=HASH_FUNCTION)
digest_object.update(target_filepath.encode('utf-8'))
return digest_object.hexdigest()




def _create_bin_name(low, high, prefix_len):
"""
<Purpose>
Create a string name of a delegated hash bin, where name will be a range of
zero-padded (up to prefix_len) strings i.e. for low=00, high=07,
prefix_len=3 the returned name would be '000-007'.
"""
if low == high:
return "{low:0{len}x}".format(low=low, len=prefix_len)

return "{low:0{len}x}-{high:0{len}x}".format(low=low, high=high,
len=prefix_len)





def _get_bin_numbers(number_of_bins):
"""
Given the desired number of bins (number_of_bins) calculate the prefix length
(prefix_length), total number of prefixes (prefix_count) and the number of
prefixes to be stored in each bin (bin_size).
Example: number_of_bins = 32
prefix_length = 2
prefix_count = 256
bin_size = 8
That is, each of the 32 hashed bins are responsible for 8 hash prefixes, i.e.
00-07, 08-0f, ..., f8-ff.
"""
# Convert 'number_of_bins' to hexadecimal and determine the number of
# hexadecimal digits needed by each hash prefix
prefix_length = len("{:x}".format(number_of_bins - 1))
# Calculate the total number of hash prefixes (e.g., 000 - FFF total values)
prefix_count = 16 ** prefix_length
# Determine how many prefixes to assign to each bin
bin_size = prefix_count // number_of_bins

# For simplicity, ensure that 'prefix_count' (16 ^ n) can be evenly
# distributed over 'number_of_bins' (must be 2 ^ n). Each bin will contain
# (prefix_count / number_of_bins) hash prefixes.
if prefix_count % number_of_bins != 0:
# Note: x % y != 0 does not guarantee that y is not a power of 2 for
# arbitrary x and y values. However, due to the relationship between
# number_of_bins and prefix_count, it is true for them.
raise securesystemslib.exceptions.Error('The "number_of_bins" argument'
' must be a power of 2.')

return prefix_length, prefix_count, bin_size




def _find_bin_for_hash(path_hash, number_of_bins):
"""
<Purpose>
For a given hashed filename, path_hash, calculate the name of a hashed bin
into which this file would be delegated given number_of_bins bins are in
use.
<Arguments>
path_hash:
The hash of the target file's path
number_of_bins:
The number of hashed_bins in use
<Returns>
The name of the hashed bin path_hash would be binned into.
"""

prefix_length, _, bin_size = _get_bin_numbers(number_of_bins)

prefix = int(path_hash[:prefix_length], 16)

low = prefix - (prefix % bin_size)
high = (low + bin_size - 1)

return _create_bin_name(low, high, prefix_length)





def create_new_repository(repository_directory, repository_name='default',
storage_backend=None):
"""
Expand Down

0 comments on commit de8649e

Please sign in to comment.