Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PREVIEW] teuthology: add support for graph walks of QA suites #2012

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ summary = Ceph test framework
python_requires = >=3.10
packages = find:
install_requires =
GitPython
PyYAML
ansible-core==2.16.6
apache-libcloud
Expand Down
97 changes: 87 additions & 10 deletions teuthology/repo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def current_branch(path: str) -> str:
return result


def enforce_repo_state(repo_url, dest_path, branch, commit=None, remove_on_error=True):
def enforce_repo_state(dest_clone, dest_path, repo_url, branch, commit=None, remove_on_error=True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer not to change initial signature and leave repo_url at first position, and add dest_cone as an optional argument with defaults value.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, there will probably be lots of reworking to this code but it was a dirty hack to start because the clones after every QA suite tweak was driving me nuts. That's why it's in this PR.

"""
Use git to either clone or update a given repo, forcing it to switch to the
specified branch.
Expand All @@ -114,25 +114,100 @@ def enforce_repo_state(repo_url, dest_path, branch, commit=None, remove_on_error
# sentinel to track whether the repo has checked out the intended
# version, in addition to being cloned
repo_reset = os.path.join(dest_path, '.fetched_and_reset')
log.info("enforce_repo_state %s %s %s %s %s", dest_clone, dest_path, repo_url, branch, commit)
try:
if not os.path.isdir(dest_path):
clone_repo(repo_url, dest_path, branch, shallow=commit is None)
if not os.path.isdir(dest_clone):
bare_repo(dest_clone)
elif not commit and not is_fresh(sentinel):
set_remote(dest_path, repo_url)
fetch_branch(dest_path, branch)
#set_remote(dest_path, repo_url)
#fetch_branch(dest_path, branch)
touch_file(sentinel)

if commit and os.path.exists(repo_reset):
return
#if commit and os.path.exists(repo_reset):
#return

reset_repo(repo_url, dest_path, branch, commit)
touch_file(repo_reset)
myfetch(dest_clone, repo_url, branch, commit)
myworkspace(dest_clone, dest_path)
#reset_repo(repo_url, dest_path, branch, commit)
#touch_file(repo_reset)
# remove_pyc_files(dest_path)
except (BranchNotFoundError, CommitNotFoundError):
if remove_on_error:
shutil.rmtree(dest_path, ignore_errors=True)
raise

def bare_repo(git_dir):
log.info("bare_repo %s", git_dir)
args = ['git', 'init', '--bare', git_dir]
proc = subprocess.Popen(args)
Copy link
Contributor

@phlogistonjohn phlogistonjohn Dec 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you don't need a lot of the complexity that Popen provides, I heartily recommend using the simpler subprocess.run function (avialable since 3.5). You can use the check=True parameter to have it raise exceptions on non-zero exit automatically too.

#args,
#stdout=subprocess.PIPE,
#stderr=subprocess.STDOUT)
if proc.wait() != 0:
raise RuntimeError("oops")

def myworkspace(git_dir, workspace_dir):
log.info("myworkspace %s %s", git_dir, workspace_dir)

if os.path.exists(workspace_dir):
args = [
'git',
'log',
'-1',
]
proc = subprocess.Popen(args,cwd=workspace_dir)
if proc.wait() != 0:
raise RuntimeError("oops")
return

args = [
'git',
'worktree',
'add',
#'--detach',
'-B', os.path.basename(workspace_dir),
'--no-track',
'--force',
workspace_dir,
'FETCH_HEAD'
]
proc = subprocess.Popen(args,cwd=git_dir)
#args,
#stdout=subprocess.PIPE,
#stderr=subprocess.STDOUT)
if proc.wait() != 0:
raise RuntimeError("oops")


def myfetch(git_dir, url, branch, commit=None):
log.info("myfetch %s %s %s %s", git_dir, url, branch, commit)
validate_branch(branch)
if commit is not None:
args = ['git', 'log', '-1', commit]
proc = subprocess.Popen(args, cwd=git_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
if proc.wait() == 0:
return
args = ['git', 'fetch', url]
if commit is not None:
args.append(commit)
else:
args.append(branch)
proc = subprocess.Popen(args,cwd=git_dir)
#proc = subprocess.Popen(
#args,
#cwd=git_dir,
#)
#stdout=subprocess.PIPE,
#stderr=subprocess.STDOUT)
if proc.wait() != 0:
not_found_str = "fatal: couldn't find remote ref %s" % branch
out = proc.stdout.read().decode()
log.error(out)
if not_found_str in out.lower():
raise BranchNotFoundError(branch)
else:
raise GitError("git fetch failed!")


def clone_repo(repo_url, dest_path, branch, shallow=True):
"""
Expand Down Expand Up @@ -354,6 +429,7 @@ def fetch_repo(url, branch, commit=None, bootstrap=None, lock=True):
os.mkdir(src_base_path)
ref_dir = ref_to_dirname(commit or branch)
dirname = '%s_%s' % (url_to_dirname(url), ref_dir)
dest_clone = os.path.join(src_base_path, url_to_dirname(url))
dest_path = os.path.join(src_base_path, dirname)
# only let one worker create/update the checkout at a time
lock_path = dest_path.rstrip('/') + '.lock'
Expand All @@ -362,7 +438,8 @@ def fetch_repo(url, branch, commit=None, bootstrap=None, lock=True):
try:
while proceed():
try:
enforce_repo_state(url, dest_path, branch, commit)
#enforce_repo_state(url, dest_path, branch, commit)
enforce_repo_state(dest_clone, dest_path, url, branch, commit)
if bootstrap:
sentinel = os.path.join(dest_path, '.bootstrapped')
if commit and os.path.exists(sentinel) or is_fresh(sentinel):
Expand Down
200 changes: 200 additions & 0 deletions teuthology/suite/build_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import logging
import os
import random
import yaml

from teuthology.suite import graph

log = logging.getLogger(__name__)


def build_graph(path, subset=None, no_nested_subset=False, seed=None, suite_repo_path=None, config=None):


"""
Return a list of items descibed by path such that if the list of
items is chunked into mincyclicity pieces, each piece is still a
good subset of the suite.

A good subset of a product ensures that each facet member appears
at least once. A good subset of a sum ensures that the subset of
each sub collection reflected in the subset is a good subset.

A mincyclicity of 0 does not attempt to enforce the good subset
property.

The input is just a path. The output is an array of (description,
[file list]) tuples.

For a normal file we generate a new item for the result list.

For a directory, we (recursively) generate a new item for each
file/dir.

For a directory with a magic '+' file, we generate a single item
that concatenates all files/subdirs (A Sum).

For a directory with a magic '%' file, we generate a result set
for each item in the directory, and then do a product to generate
a result list with all combinations (A Product). If the file
contains an integer, it is used as the divisor for a random
subset.

For a directory with a magic '$' file, or for a directory whose name
ends in '$', we generate a list of all items that we will randomly
choose from.

The final description (after recursion) for each item will look
like a relative path. If there was a % product, that path
component will appear as a file with braces listing the selection
of chosen subitems.

:param path: The path to search for yaml fragments
:param subset: (index, outof)
:param no_nested_subset: disable nested subsets
:param seed: The seed for repeatable random test
"""

if subset:
log.info(
'Subset=%s/%s' %
(str(subset[0]), str(subset[1]))
)
if no_nested_subset:
log.info("no_nested_subset")
random.seed(seed)
(which, divisions) = (0,1) if subset is None else subset
G = graph.Graph()
log.info("building graph")
_build_graph(G, path, suite_repo_path=suite_repo_path, config=config)
#log.debug("graph:\n%s", G.print()) This is expensive with the print as an arg.
configs = []
log.info("walking graph")
for desc, paths in G.walk(which, divisions, no_nested_subset):
log.debug("generated %s", desc)
configs.append((desc, paths))
log.info("generated %d configs", len(configs))
return configs

# To start: let's plug git into Lua so we can inspect versions of Ceph!
# - Use Lua to control how large the subset should be.. based on a target number of jobs..
# - Use Lua to tag parts of a suite suite that should be included in a broader smoke run.
# - Use Lua to create the graph.

#Graph
#Lua rewrite
#Change edge randomization based on visitation. Prune before adding to nodes list during walk.
#Set tags at root of graph. Then Lua code in dir prunes at graph creation time.
#Set subset based on target # of jobs
# TODO: maybe reimplement graph.lua so that we can have the graph expand / prune with lua code provided by qa/ suite
# reef.lua:
# git = lupa.git
# function generate()
# ...
# end
# function prune()
# end
def _build_graph(G, path, **kwargs):
flatten = kwargs.pop('flatten', False)
suite_repo_path = kwargs.get('suite_repo_path', None)
config = kwargs.get('config', None)

if os.path.basename(path)[0] == '.':
return None
if not os.path.exists(path):
raise IOError('%s does not exist (abs %s)' % (path, os.path.abspath(path)))
if os.path.isfile(path):
if path.endswith('.yaml'):
node = graph.Node(path, G)
with open(path) as f:
txt = f.read()
node.set_content(yaml.safe_load(txt))
return node
if path.endswith('.lua'):
if suite_repo_path is not None:
import git
Gsuite = git.Repo(suite_repo_path)
else:
Gsuite = None
log.info("%s", Gsuite)
node = graph.LuaGraph(path, G, Gsuite)
node.load()
return node
return None
if os.path.isdir(path):
if path.endswith('.disable'):
return None
files = sorted(os.listdir(path))
if len(files) == 0:
return None
subg = graph.SubGraph(path, G)
specials = ('+', '$', '%')
if '+' in files:
# concatenate items
for s in specials:
if s in files:
files.remove(s)

current = subg.source
for fn in sorted(files):
node = _build_graph(G, os.path.join(path, fn), flatten=True, **kwargs)
if node:
current.add_edge(node)
current = node
subg.link_node_to_sink(current)
elif path.endswith('$') or '$' in files:
# pick a random item -- make sure we don't pick any magic files
for s in specials:
if s in files:
files.remove(s)

for fn in sorted(files):
node = _build_graph(G, os.path.join(path, fn), flatten=False, **kwargs)
if node:
subg.source.add_edge(node) # to source
subg.link_node_to_sink(node) # to sink
subg.set_subset(len(files), force=True)
elif '%' in files:
# convolve items
for s in specials:
if s in files:
files.remove(s)

with open(os.path.join(path, '%')) as f:
divisions = f.read()
if len(divisions) == 0:
divisions = 1
else:
divisions = int(divisions)
assert divisions > 0
subg.set_subset(divisions)

current = subg.source
for fn in sorted(files):
node = _build_graph(G, os.path.join(path, fn), flatten=False, **kwargs)
if node:
current.add_edge(node)
current = node
subg.link_node_to_sink(current)
subg.set_subset(divisions)
else:
# list items
for s in specials:
if s in files:
files.remove(s)

current = subg.source
for fn in sorted(files):
node = _build_graph(G, os.path.join(path, fn), flatten=flatten, **kwargs)
if node:
current.add_edge(node) # to source
if flatten:
current = node
else:
subg.link_node_to_sink(node) # to sink
if flatten:
subg.link_node_to_sink(current) # to sink

return subg

raise RuntimeError(f"Invalid path {path} seen in _build_graph")
53 changes: 53 additions & 0 deletions teuthology/suite/fragment-generate.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
-- allow only some Lua (and lunatic) builtins for use by scripts
local SCRIPT_ENV = {
assert = assert,
error = error,
ipairs = ipairs,
next = next,
pairs = pairs,
tonumber = tonumber,
tostring = tostring,
py_attrgetter = python.as_attrgetter,
py_dict = python.builtins.dict,
py_len = python.builtins.len,
py_list = python.builtins.list,
py_tuple = python.builtins.tuple,
py_enumerate = python.enumerate,
py_iterex = python.iterex,
py_itemgetter = python.as_itemgetter,
math = math,
}
local SCRIPT_MT = {
__index = SCRIPT_ENV,
}

function new_script(script, log, deep_merge, yaml_load)
-- create a restricted sandbox for the script:
local env = setmetatable({
--deep_merge = deep_merge,
log = log,
--yaml_load = yaml_load,
}, SCRIPT_MT)

-- avoid putting check_filters in _ENV
-- try to keep line numbers correct:
local header = [[local function main(...) ]]
local footer = [[ end return main]]
local function chunks()
--coroutine.yield(header)
if #script > 0 then
coroutine.yield(script)
end
--coroutine.yield(footer)
end

print('new_script', script)

-- put the script in a coroutine so we can yield success/failure from
-- anywhere in the script, including in nested function calls.
local f, err = load(coroutine.wrap(chunks), 'teuthology', 't', env)
if f == nil then
error("failure to load script: "..err)
end
return env, f
end
Loading
Loading