-
Notifications
You must be signed in to change notification settings - Fork 543
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[instructlab] Add new plugin for Instructlab
This change adds a new plugin that captures information from an installation of Instructlab. The capture works in two ways: - It goes through all containers whose names contain Instructlab, and - If specified, it will gather information from a user home directory. In the first case, we gather different outputs from the command 'ilab', as well as directories containing training data, configuration files, chatlogs, taxonomies, and other data. In the second case, we capture only certain directories. Users can also specify to gather .cache directory, where models and OCI directories can be found. This option is disabled by default because, while the information can be very useful, the directories are very big. Related: RH: PLMCORE-10599, RHEL-54137, RHEL-58173 Closes: #3804 Signed-off-by: Jose Castillo <[email protected]> Signed-off-by: Pavel Moravec <[email protected]>
- Loading branch information
Showing
1 changed file
with
158 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# Copyright (C) 2024 Red Hat, Inc., Jose Castillo <[email protected]> | ||
|
||
# This file is part of the sos project: https://github.com/sosreport/sos | ||
# | ||
# This copyrighted material is made available to anyone wishing to use, | ||
# modify, copy, or redistribute it subject to the terms and conditions of | ||
# version 2 of the GNU General Public License. | ||
# | ||
# See the LICENSE file in the source distribution for further information. | ||
|
||
import pwd | ||
from sos.report.plugins import Plugin, IndependentPlugin, PluginOpt | ||
|
||
|
||
class Instructlab(Plugin, IndependentPlugin): | ||
""" | ||
This plugin is used to capture information about | ||
Instructlab installations. | ||
InstructLab is an open source project for enhancing | ||
large language models (LLMs) used in generative | ||
artificial intelligence (gen AI) applications. | ||
Instructlab can run either as a container, or directly | ||
outside a container. | ||
""" | ||
|
||
short_desc = 'Instructlab' | ||
plugin_name = 'instructlab' | ||
profiles = ('ai',) | ||
containers = ('instructlab', 'ilab',) | ||
commands = ('ilab',) | ||
|
||
option_list = [ | ||
PluginOpt('ilab-user', default='cloud-user', val_type=str, | ||
desc='user that runs instructlab'), | ||
PluginOpt('ilab-conf-dir', default='', val_type=str, | ||
desc='instructlab data directory'), | ||
PluginOpt('get-cache', default=False, | ||
desc='Capture models and osci cached data') | ||
] | ||
|
||
def setup(self): | ||
cont_share_conf_path = "/usr/share/instructlab/config" | ||
cont_opt_path = "/opt/app-root/src" | ||
# .cache dir contains the models and oci directories | ||
# which can be quite big. We'll gather this only if | ||
# specifying it via command line option | ||
cache_dir = ".cache/instructlab" | ||
# .config is where the configuration yaml files can | ||
# be found. We gather this always. | ||
config_dir = ".config/instructlab" | ||
# In the .local directory we can find datasets, | ||
# chat logs, taxonomies, and other very useful data | ||
# We gather this always. | ||
local_share_dir = ".local/share/instructlab" | ||
|
||
# container paths | ||
cont_cache_path = self.path_join(cont_opt_path, cache_dir) | ||
cont_config_path = self.path_join(cont_opt_path, config_dir) | ||
cont_local_path = self.path_join(cont_opt_path, local_share_dir) | ||
|
||
self.add_forbidden_path([ | ||
self.path_join(cont_local_path, | ||
"taxonomy/.git"), | ||
self.path_join(cont_local_path, "taxonomy/.github"), | ||
self.path_join(cont_opt_path, | ||
"src/.local/share/instructlab/taxonomy/.git"), | ||
self.path_join(cont_opt_path, | ||
"src/.local/share/instructlab/taxonomy/.github"), | ||
]) | ||
|
||
subcmds = [ | ||
'taxonomy diff', | ||
'taxonomy diff --taxonomy-base=empty', | ||
'system info', | ||
'model list', | ||
'config show', | ||
] | ||
|
||
data_dirs = [ | ||
'data', | ||
'generated', | ||
'taxonomy', | ||
'taxonomy_data', | ||
'chatlogs', | ||
'checkpoints', | ||
'datasets', | ||
'internal', | ||
'phased', | ||
] | ||
|
||
# If containerized, run commands in containers | ||
try: | ||
ilab_con = self.get_all_containers_by_regex("instructlab*")[0][1] | ||
except Exception: # pylint: disable=broad-except | ||
ilab_con = None | ||
|
||
self.add_copy_spec( | ||
[f"{cont_share_conf_path}/rhel_ai_config.yaml", | ||
f"{cont_config_path}/config.yaml"], | ||
container=ilab_con | ||
) | ||
self.add_copy_spec( | ||
[self.path_join(cont_local_path, data_dir) | ||
for data_dir in data_dirs], | ||
container=ilab_con | ||
) | ||
self.add_cmd_output( | ||
[f"ilab {sub}" for sub in subcmds], | ||
container=ilab_con | ||
) | ||
self.add_dir_listing( | ||
cont_cache_path, | ||
recursive=True, | ||
container=ilab_con | ||
) | ||
if self.get_option('get-cache'): | ||
self.add_copy_spec( | ||
f"{cont_cache_path}", | ||
container=ilab_con | ||
) | ||
self.add_container_logs(list(self.containers)) | ||
|
||
ilab_user = self.get_option("ilab-user") | ||
try: | ||
user_pwd = pwd.getpwnam(ilab_user) | ||
except KeyError: | ||
# The user doesn't exist, this will skip the rest | ||
self._log_warn( | ||
f'User "{ilab_user}" does not exist, will not ' | ||
'collect Instructlab information. Use ' | ||
'`-k instructlab.ilab-user` option to define ' | ||
'the user to use to collect data for Instructlab') | ||
return | ||
|
||
if user_pwd: | ||
ilab_dir = user_pwd.pw_dir | ||
if self.get_option("ilab-conf-dir"): | ||
ilab_dir = self.path_join( | ||
ilab_dir, self.get_option('ilab-conf-dir') | ||
) | ||
data_dirs_base = self.path_join(ilab_dir, local_share_dir) | ||
|
||
self.add_copy_spec(self.path_join(ilab_dir, config_dir)) | ||
self.add_copy_spec([ | ||
self.path_join(data_dirs_base, data_dir) | ||
for data_dir in data_dirs | ||
]) | ||
self.add_dir_listing( | ||
self.path_join(ilab_dir, cache_dir), | ||
recursive=True | ||
) | ||
|
||
if self.get_option("get-cache"): | ||
self.add_copy_spec( | ||
self.path_join(ilab_dir, cache_dir) | ||
) | ||
|
||
# vim: set et ts=4 sw=4 : |