Skip to content

Commit

Permalink
Add is_normalized and starts_with to paths module (#514)
Browse files Browse the repository at this point in the history
* Add is_normalized and starts_with to paths module.

* Update docs
  • Loading branch information
comius authored May 29, 2024
1 parent f351bed commit 0e485c8
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 0 deletions.
48 changes: 48 additions & 0 deletions docs/paths_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,33 @@ Returns `True` if `path` is an absolute path.
`True` if `path` is an absolute path.


<a id="paths.is_normalized"></a>

## paths.is_normalized

<pre>
paths.is_normalized(<a href="#paths.is_normalized-str">str</a>, <a href="#paths.is_normalized-look_for_same_level_references">look_for_same_level_references</a>)
</pre>

Returns true if the passed path doesn't contain uplevel references "..".

Also checks for single-dot references "." if look_for_same_level_references
is `True.`


**PARAMETERS**


| Name | Description | Default Value |
| :------------- | :------------- | :------------- |
| <a id="paths.is_normalized-str"></a>str | The path string to check. | none |
| <a id="paths.is_normalized-look_for_same_level_references"></a>look_for_same_level_references | If True checks if path doesn't contain uplevel references ".." or single-dot references ".". | `True` |

**RETURNS**

True if the path is normalized, False otherwise.


<a id="paths.join"></a>

## paths.join
Expand Down Expand Up @@ -239,3 +266,24 @@ the leading dot). The returned tuple always satisfies the relationship
`root + ext == p`.


<a id="paths.starts_with"></a>

## paths.starts_with

<pre>
paths.starts_with(<a href="#paths.starts_with-path_a">path_a</a>, <a href="#paths.starts_with-path_b">path_b</a>)
</pre>

Returns True if and only if path_b is an ancestor of path_a.

Does not handle OS dependent case-insensitivity.

**PARAMETERS**


| Name | Description | Default Value |
| :------------- | :------------- | :------------- |
| <a id="paths.starts_with-path_a"></a>path_a | <p align="center"> - </p> | none |
| <a id="paths.starts_with-path_b"></a>path_b | <p align="center"> - </p> | none |


78 changes: 78 additions & 0 deletions lib/paths.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,67 @@ def _normalize(path):

return path or "."

_BASE = 0
_SEPARATOR = 1
_DOT = 2
_DOTDOT = 3

def _is_normalized(str, look_for_same_level_references = True):
"""Returns true if the passed path doesn't contain uplevel references "..".
Also checks for single-dot references "." if look_for_same_level_references
is `True.`
Args:
str: The path string to check.
look_for_same_level_references: If True checks if path doesn't contain
uplevel references ".." or single-dot references ".".
Returns:
True if the path is normalized, False otherwise.
"""
state = _SEPARATOR
for c in str.elems():
is_separator = False
if c == "/":
is_separator = True

if state == _BASE:
if is_separator:
state = _SEPARATOR
else:
state = _BASE
elif state == _SEPARATOR:
if is_separator:
state = _SEPARATOR
elif c == ".":
state = _DOT
else:
state = _BASE
elif state == _DOT:
if is_separator:
if look_for_same_level_references:
# "." segment found.
return False
state = _SEPARATOR
elif c == ".":
state = _DOTDOT
else:
state = _BASE
elif state == _DOTDOT:
if is_separator:
return False
else:
state = _BASE

if state == _DOT:
if look_for_same_level_references:
# "." segment found.
return False
elif state == _DOTDOT:
return False
return True

def _relativize(path, start):
"""Returns the portion of `path` that is relative to `start`.
Expand Down Expand Up @@ -230,13 +291,30 @@ def _split_extension(p):
dot_distance_from_end = len(b) - last_dot_in_basename
return (p[:-dot_distance_from_end], p[-dot_distance_from_end:])

def _starts_with(path_a, path_b):
"""Returns True if and only if path_b is an ancestor of path_a.
Does not handle OS dependent case-insensitivity."""
if not path_b:
# all paths start with the empty string
return True
norm_a = _normalize(path_a)
norm_b = _normalize(path_b)
if len(norm_b) > len(norm_a):
return False
if not norm_a.startswith(norm_b):
return False
return len(norm_a) == len(norm_b) or norm_a[len(norm_b)] == "/"

paths = struct(
basename = _basename,
dirname = _dirname,
is_absolute = _is_absolute,
join = _join,
normalize = _normalize,
is_normalized = _is_normalized,
relativize = _relativize,
replace_extension = _replace_extension,
split_extension = _split_extension,
starts_with = _starts_with,
)
76 changes: 76 additions & 0 deletions tests/paths_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,55 @@ def _normalize_test(ctx):

normalize_test = unittest.make(_normalize_test)

def _is_normalized_test(ctx):
"""Unit tests for paths.is_normalized."""
env = unittest.begin(ctx)

# Try the most basic cases.
asserts.true(env, paths.is_normalized(""))
asserts.false(env, paths.is_normalized("."))
asserts.true(env, paths.is_normalized("/"))
asserts.true(env, paths.is_normalized("/tmp"))
asserts.true(env, paths.is_normalized("tmp"))
asserts.true(env, paths.is_normalized("c:/"))
asserts.false(env, paths.is_normalized("../a"))
asserts.false(env, paths.is_normalized("a/.."))

# Try some basic adjacent-slash removal.
asserts.true(env, paths.is_normalized("foo//bar"))
asserts.true(env, paths.is_normalized("foo////bar"))

# Try some "." removal.
asserts.false(env, paths.is_normalized("foo/./bar"))
asserts.false(env, paths.is_normalized("./foo/bar"))
asserts.false(env, paths.is_normalized("foo/bar/."))
asserts.false(env, paths.is_normalized("/."))

# Try some ".." removal.
asserts.false(env, paths.is_normalized("foo/../bar"))
asserts.false(env, paths.is_normalized("foo/bar/.."))
asserts.false(env, paths.is_normalized("foo/.."))
asserts.false(env, paths.is_normalized("foo/bar/../.."))
asserts.false(env, paths.is_normalized("foo/../.."))
asserts.false(env, paths.is_normalized("/foo/../.."))
asserts.false(env, paths.is_normalized("a/b/../../../../c/d/.."))

# Make sure one or two initial slashes are preserved, but three or more are
# collapsed to a single slash.
asserts.true(env, paths.is_normalized("/foo"))
asserts.true(env, paths.is_normalized("//foo"))
asserts.true(env, paths.is_normalized("///foo"))

# Trailing slashes should be removed unless the entire path is a trailing
# slash.
asserts.true(env, paths.is_normalized("/"))
asserts.true(env, paths.is_normalized("foo/"))
asserts.true(env, paths.is_normalized("foo/bar/"))

return unittest.end(env)

is_normalized_test = unittest.make(_is_normalized_test)

def _relativize_test(ctx):
"""Unit tests for paths.relativize."""
env = unittest.begin(ctx)
Expand Down Expand Up @@ -276,6 +325,31 @@ def _split_extension_test(ctx):

split_extension_test = unittest.make(_split_extension_test)

def _starts_with_test(ctx):
"""Unit tests for paths.starts_with."""
env = unittest.begin(ctx)

# Make sure that relative-to-current-directory works in all forms.
asserts.true(env, paths.starts_with("foo", ""))
asserts.false(env, paths.starts_with("foo", "."))

# Try some regular cases.
asserts.true(env, paths.starts_with("foo/bar", "foo"))
asserts.false(env, paths.starts_with("foo/bar", "fo"))
asserts.true(env, paths.starts_with("foo/bar/baz", "foo/bar"))
asserts.true(env, paths.starts_with("foo/bar/baz", "foo"))

# Try a case where a parent directory is normalized away.
asserts.true(env, paths.starts_with("foo/bar/../baz", "foo"))

# Relative paths work, as long as they share a common start.
asserts.true(env, paths.starts_with("../foo/bar/baz/file", "../foo/bar/baz"))
asserts.true(env, paths.starts_with("../foo/bar/baz/file", "../foo/bar"))

return unittest.end(env)

starts_with_test = unittest.make(_starts_with_test)

def paths_test_suite():
"""Creates the test targets and test suite for paths.bzl tests."""
unittest.suite(
Expand All @@ -285,7 +359,9 @@ def paths_test_suite():
is_absolute_test,
join_test,
normalize_test,
is_normalized_test,
relativize_test,
replace_extension_test,
split_extension_test,
starts_with_test,
)

0 comments on commit 0e485c8

Please sign in to comment.