From 82cc01b322f6d3c034ec8b1e36858d02738ac8a8 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 6 Apr 2023 07:51:44 -0400 Subject: [PATCH] API documentation improvements (#3365) * API documentation improvements PBENCH-829 Fill out and (to some extent update) API documentation. This also updates the dashboard `uriTemplate` method to use a `reduce` pattern instead of a loop. --- dashboard/src/utils/helper.js | 9 +- docs/API/V1/README.md | 32 +---- docs/API/V1/contents.md | 12 +- docs/API/V1/daterange.md | 52 ++++++- docs/API/V1/delete.md | 58 +++++++- docs/API/V1/detail.md | 122 ++++++++++++++++- docs/API/V1/endpoints.md | 233 +++++++++++++++++++++++++++++++- docs/API/V1/inventory.md | 8 +- docs/API/V1/list.md | 227 ++++++++++++++++++++++++++++--- docs/API/V1/login.md | 6 - docs/API/V1/logout.md | 5 - docs/API/V1/publish.md | 7 - docs/API/V1/register.md | 5 - docs/API/V1/server_audit.md | 238 +++++++++++++++++++++++++++++++++ docs/API/V1/server_settings.md | 27 ++-- docs/API/V1/update.md | 79 +++++++++++ docs/API/V1/upload.md | 106 +++++++++++++++ docs/API/V1/user.md | 7 - docs/API/access_model.md | 19 +-- docs/API/metadata.md | 134 +++++++++++-------- 20 files changed, 1202 insertions(+), 184 deletions(-) delete mode 100644 docs/API/V1/login.md delete mode 100644 docs/API/V1/logout.md delete mode 100644 docs/API/V1/publish.md delete mode 100644 docs/API/V1/register.md create mode 100644 docs/API/V1/server_audit.md create mode 100644 docs/API/V1/update.md create mode 100644 docs/API/V1/upload.md delete mode 100644 docs/API/V1/user.md diff --git a/dashboard/src/utils/helper.js b/dashboard/src/utils/helper.js index 271cee3c8f..e6adc7a165 100644 --- a/dashboard/src/utils/helper.js +++ b/dashboard/src/utils/helper.js @@ -14,9 +14,8 @@ export const uid = () => { * @return {string} - formatted URI */ export const uriTemplate = (endpoints, name, args = {}) => { - let uri = endpoints.uri[name].template; - for (const [key, value] of Object.entries(args)) { - uri = uri.replace(`{${key}}`, value); - } - return uri; + return Object.entries(args).reduce( + (uri, [key, value]) => uri.replace(`{${key}}`, value), + endpoints.uri[name].template + ); }; diff --git a/docs/API/V1/README.md b/docs/API/V1/README.md index 2ac60f874d..25fe93f24d 100644 --- a/docs/API/V1/README.md +++ b/docs/API/V1/README.md @@ -35,18 +35,6 @@ artifacts, and the archived tarball. The resource ID becomes invalid subsequentl unless a new dataset is created with the same checksum value. (This is highly unlikely, unless the same Pbench Agent tarball is uploaded again.) -### Users - -User resources are identified by a `username` property, which must be unique -among all users registered on the Pbench Server. The user resource has a -"user profile" that includes first and last name, and a contact email. - -A user resource is the "owner" of each [dataset](#datasets) managed by the -Pbench Server. If a user is deleted, then any datasets owned by that user -become orphaned; datasets with PUBLIC access are still accessible to other -users, and PRIVATE datasets are accessible through the `ADMIN` user role (see -[access model](../access_model.md)). - ### Metadata Metadata resources are secondary resources tied to a dataset resource and, for @@ -67,22 +55,6 @@ is made PRIVATE, or if the user relies on a role or group (see removed. In this case, however, the metadata values remain, and will become visible again if READ access is restored. -## Login and registration - -You can register a new user (depending on the administration policy of the -server) using the [register](register.md) API. If this succeeds, you can log in -using the new username and password. - -You can log in as a registered user by calling the [login](login.md) API, which -returns a bearer schema authentication token that should be provided to -subsequent API calls using the `authorization` header. - -You can log out an active authentication token by passing it as the -`authorization` header to the [logout](logout.md) API. - -While logged in, you can retrieve (`GET`) and modify (`PUT`) your user profile -through the [user](user.md) API. - ## Dataset metadata You can read a more complete specification of Pbench Server metadata at @@ -96,8 +68,8 @@ associated with the authorization token given to the Pbench Agent calculate a default deletion date for the dataset based on the owner's retention policy and the server administrator's retention policy. -Clients can also set arbitrary metadata through the `dashboard` and `user` -metadata namespaces. The `dashboard` namespace can only be modified by the +Clients can also set arbitrary metadata in the `global` and `user` +metadata namespaces. The `global` namespace can only be modified by the owner of the dataset, and is visible to anyone with read access to the dataset. The `user` namespace is private to each authenticated user, and even if you don't own a dataset you can set your own private `user` metadata to help you diff --git a/docs/API/V1/contents.md b/docs/API/V1/contents.md index 12c3ced745..e215065fcf 100644 --- a/docs/API/V1/contents.md +++ b/docs/API/V1/contents.md @@ -1,4 +1,4 @@ -# `GET /api/v1/datasets/contents//` +# `GET /api/v1/datasets//contents/[]` This API returns an `application/json` document describing a file or the content of a directory at a specified `` within the `` tarball @@ -13,8 +13,8 @@ The resource ID of a dataset on the Pbench Server. The path of an item in the dataset inventory, as captured by the Pbench Agent packaging. Note that the `/` separating the two parameters serves to mark the relative root directory of the tarball. For example -`/api/v1/datasets/contents//` represents the root, and -`/api/v1/datasets/contents//directory/` represents a directory named +`/api/v1/datasets//contents/` represents the root, and +`/api/v1/datasets//contents/directory/` represents a directory named `directory` at the root level. ## Request headers @@ -36,9 +36,11 @@ See [Access model](../access_model.md) ## Response status +`200` **OK** \ +Successful request. + `401` **UNAUTHORIZED** \ -The client is not authenticated and does not have READ access to the specified -dataset. +The client is not authenticated. `403` **FORBIDDEN** \ The authenticated client does not have READ access to the specified dataset. diff --git a/docs/API/V1/daterange.md b/docs/API/V1/daterange.md index 0f46e9150f..6bd44ec47a 100644 --- a/docs/API/V1/daterange.md +++ b/docs/API/V1/daterange.md @@ -5,4 +5,54 @@ authenticated client, optionally filtered by owner and/or access policy. For example, this can be used to initialize a date picker. -__TBD__ +## Query parameters + +`access` string \ +Select whether only `private` or only `public` access datasets will be included +in the list. By default, all datasets readable by the authenticated user are +included. For example, without constraints `/datasets/daterange` for an +authenticated user will include all `public` datasets plus all datasets owned +by the authenticated user; specifying `private` will show only the authenticated +user's private datasets, while specifying `public` will show only `public` +datasets (regardless of ownership). + +`owner` string \ +Select only datasets owned by the specified username. Unless the username +matches the authenticated user, only "public" datasets can be selected. + +## Response status + +`200` **OK** \ +Successful request. + +`401` **UNAUTHORIZED** \ +The client did not provide an authentication token but asked to filter datasets +by `owner` or `access=private`. + +`403` **FORBIDDEN** \ +The client asked to filter `access=private` datasets or by `owner` for which +the client does not have READ access. + +`503` **SERVICE UNAVAILABLE** \ +The server has been disabled using the `server-state` server configuration +setting in the [server configuration](./server_config.md) API. The response +body is an `application/json` document describing the current server state, +a message, and optional JSON data provided by the system administrator. + +## Response headers + +`content-type: application/json` \ +The return is a JSON document containing the date range of datasets on the +Pbench Server. + +## Response body + +The `application/json` response body is a JSON object describing the earliest +and most recent dataset upload time on the Pbench Server. + +```json +{ + "from": "2023-03-17T03:14:02.013184+00:00", + "to": "2023-04-05T11:29:02.585772+00:00" +} +``` diff --git a/docs/API/V1/delete.md b/docs/API/V1/delete.md index 4baaf57c21..ca64a7f63f 100644 --- a/docs/API/V1/delete.md +++ b/docs/API/V1/delete.md @@ -4,4 +4,60 @@ This API completely deletes a dataset resource, erasing the dataset resource ID, the dataset tarball and unpacked artifacts, and all backend data related to the dataset. -__TBD__ +## URI parameters + +`` string \ +The resource ID of a dataset on the Pbench Server. + +## Request headers + +`authorization: bearer` token \ +*Bearer* schema authorization is required to access any non-public dataset. +E.g., `authorization: bearer ` + +## Response headers + +`content-type: application/json` \ +The return is a serialized JSON object with status feedback. + +## Resource access + +* Requires `DELETE` access to the `` resource + +See [Access model](../access_model.md) + +## Response status + +`200` **OK** \ +Successful request. + +`401` **UNAUTHORIZED** \ +The client is not authenticated. + +`403` **FORBIDDEN** \ +The authenticated client does not have `DELETE` access to the specified dataset. + +`404` **NOT FOUND** \ +The `` resource ID does not exist. + +`503` **SERVICE UNAVAILABLE** \ +The server has been disabled using the `server-state` server configuration +setting in the [server configuration](./server_config.md) API. The response +body is an `application/json` document describing the current server state, +a message, and optional JSON data provided by the system administrator. + +## Response body + +The `application/json` response body consists of a JSON object summarizing the +Elasticsearch index deletion. For example, if the dataset has 9 Elasticsearch +index documents and all are deleted successfully, + +```json +{ + "failure": 0, + "ok": 9 +} +``` + +If the dataset had not been indexed, both numbers will be 0. A non-zero +`"failure"` indicates a partial success, which can be retried. diff --git a/docs/API/V1/detail.md b/docs/API/V1/detail.md index f2a0e6ed83..6fe248079c 100644 --- a/docs/API/V1/detail.md +++ b/docs/API/V1/detail.md @@ -1,6 +1,120 @@ -# `GET /api/v1/datasets/detail/` +# `GET /api/v1/datasets//detail` -This API returns detailed information about a dataset's run environment. It -can also return Pbench Server [metadata](../metadata.md). +This API returns detailed information about a dataset's run environment from the +Elasticsearch index. It can also return Pbench Server [metadata](../metadata.md). -__TBD__ +Note that this information is mostly acquired from the dataset's `metadata.log` +file which is also directly accessible as metadata through `dataset.metalog`. + +## URI parameters + +`` string \ +The resource ID of a Pbench dataset on the server. + +## Query parameters + +`metadata` requested metadata keys \ +A list of server metadata tags; see [Metadata](../metadata.md). For example, +`?metadata=dataset.access,global.server.legacy` will return the value of the +two metadata keys `dataset.access` (the dataset's access scope) and +`global.server.legacy` (a user-defined global value). + +## Request headers + +`authorization: bearer` token \ +*Bearer* schema authorization is required to access any non-public dataset. +E.g., `authorization: bearer ` + +## Response headers + +`content-type: application/json` \ +The return is a JSON document containing the summary "run" data from the +dataset index. + +## Resource access + +* Requires `READ` access to the `` resource + +See [Access model](../access_model.md) + +## Response status + +`200` **OK** \ +Successful request. + +`400` **BAD_REQUEST** \ +One or more metadata keys specified were unacceptable. + +`401` **UNAUTHORIZED** \ +The client is not authenticated. + +`403` **FORBIDDEN** \ +The authenticated client does not have READ access to the specified dataset. + +`404` **NOT FOUND** \ +The `` does not exist. + +`503` **SERVICE UNAVAILABLE** \ +The server has been disabled using the `server-state` server configuration +setting in the [server configuration](./server_config.md) API. The response +body is an `application/json` document describing the current server state, +a message, and optional JSON data provided by the system administrator. + +## Response body + +The `application/json` response body is a JSON object containing the dataset +index "run" data and any requested server metadata, as follows. + +The following example shows server metadata from the query parameter +`?metadata=dataset.access`. + +```json +{ + "hostTools": [ + { + "hostname": "controller.example.com", + "tools": { + "hostname-alias": "", + "hostname-all-fqdns": "host.containers.internal controller.example.com controller.example.com controller.example.com", + "hostname-all-ip-addresses": "10.1.36.93 172.21.63.246 10.1.63.92 192.168.122.1", + "hostname-domain": "rdu2.scalelab.redhat.com", + "hostname-fqdn": "controller.example.com", + "hostname-ip-address": "10.1.36.93", + "hostname-nis": "hostname: Local domain name not set", + "hostname-short": "controller", + "rpm-version": "v0.71.0-3g85910732a", + "tools": "vmstat", + "vmstat": "--interval=3" + } + } + ], + "runMetadata": { + "controller": "controller.example.com", + "controller_dir": "controller.example.com", + "date": "2023-03-23T20:26:03", + "end": "2023-03-23T20:26:13.177673", + "file-date": "2023-03-23T20:27:12.376720", + "file-name": "/srv/pbench/archive/fs-version-001/controller.example.com/pbench-user-benchmark__2023.03.23T20.26.03.tar.xz", + "file-size": 12804, + "hostname_f": "controller.example.com", + "hostname_ip": "10.1.36.93, 172.21.63.246, 10.1.63.92, 192.168.122.1", + "hostname_s": "f09-h29-b01-5039ms", + "id": "001ab7f04079f620f6f624b6eea913df", + "iterations": "1-default", + "md5": "001ab7f04079f620f6f624b6eea913df", + "name": "pbench-user-benchmark__2023.03.23T20.26.03", + "pbench-agent-version": "v0.71.0-3g85910732a", + "raw_size": 265692, + "result-prefix": "spc", + "script": "pbench-user-benchmark", + "start": "2023-03-23T20:26:05.949697", + "tar-ball-creation-timestamp": "2023-03-23T20:26:16.755310", + "toc-prefix": "pbench-user-benchmark__2023.03.23T20.26.03", + "toolsgroup": "default", + "user": "agent" + }, + "serverMetadata": { + "dataset.access": "public" + } +} +``` diff --git a/docs/API/V1/endpoints.md b/docs/API/V1/endpoints.md index ccd8de7560..103c1dd179 100644 --- a/docs/API/V1/endpoints.md +++ b/docs/API/V1/endpoints.md @@ -1,6 +1,235 @@ # `GET /api/v1/endpoints` This API describes the set of URI endpoints available under the Pbench Server -V1 API. +V1 API, the Keycloak broker configuration for authorization, and the current +Pbench Server version identification. -__TBD__ +This API does not require authentication and has no access restrictions. + +## Response status + +`200` **OK** \ +Successful request. + +## Response headers + +`content-type: application/json` \ +The return is a JSON document containing the summary "run" data from the +dataset index. + +## Response body + +The `application/json` response body is a JSON object describing the Pbench +Server configuration. + +The information is divided into four sections, as described below. + +### `identification` + +This identifies the name and version of the Pbench Server. + +### `openid` + +The Pench Server authenticates through an OIDC broker (e.g., Keycloak). In order +to authenticate and receive an authorization token to present to server APIs, the +client must redirect to the broker login page using the `server_url` given here, +with the Pbench Server `realm` and `client` ID. + +### `uri` + +A representation of the Pbench Server APIs supported on this server. + +#### Name + +The "name" of the API. For example, to query or set metadata for a dataset, +`endpoints.uri.dataset_metadata` would return a JSON object describing the +URI template and parameters for the API. + +##### `template` + +The API's URI template pattern, with URI parameters in the form `{}`, as in +`http://host:port/api/v1/datasets/{dataset}/metadata`. + +##### `params` + +A sub-object describing the URI parameters referenced in the URI template. Each +param has a name and type. Note that "type" refers to the Flask URI parsing, and +the main useful distinction here is that `string` means a simple undeliminated +string whereas `path` refers to a `/`-separated string that resembles a UNIX file +path. + +Each param name appears in the template in the form `{}`, which is a convenient +format for the Python `format` function. + +```python + uri = endpoints["uri"]["datasets_metadata"]["template"].format(dataset=id) +``` + +A similar formatter can be built easily for Javascript: + +```javascript +/** + * Expand a templated API URI like a Python `.format` + * + * @param {Object} endpoints - endpoint object from server + * @param {string} name - name of the API to expand + * @param {Object} args - value for each templated parameter + * @return {string} - formatted URI + */ +export const uriTemplate = (endpoints, name, args) => { + return Object.entries(args).reduce( + (uri, [key, value]) => uri.replace(`{${key}}`, value), + endpoints.uri[name].template + ); +}; + +let uri = uriTemplate( + endpoints, + 'datasets_metadata', + {dataset: resource_id} + ); +``` + +```json +{ + "identification": "Pbench server 1.0.0-85189370c", + "openid": { + "uri": "openid.example.com", + "pbench-client": "client name" + }, + "uri": { + "datasets": { + "params": { + "dataset": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/{dataset}" + }, + "datasets_contents": { + "params": { + "dataset": { + "type": "string" + }, + "target": { + "type": "path" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/{dataset}/contents/{target}" + }, + "datasets_daterange": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/datasets/daterange" + }, + "datasets_detail": { + "params": { + "dataset": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/{dataset}/detail" + }, + "datasets_inventory": { + "params": { + "dataset": { + "type": "string" + }, + "target": { + "type": "path" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/{dataset}/inventory/{target}" + }, + "datasets_list": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/datasets" + }, + "datasets_mappings": { + "params": { + "dataset_view": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/mappings/{dataset_view}" + }, + "datasets_metadata": { + "params": { + "dataset": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/{dataset}/metadata" + }, + "datasets_namespace": { + "params": { + "dataset": { + "type": "string" + }, + "dataset_view": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/{dataset}/namespace/{dataset_view}" + }, + "datasets_search": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/datasets/search" + }, + "datasets_values": { + "params": { + "dataset": { + "type": "string" + }, + "dataset_view": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/datasets/{dataset}/values/{dataset_view}" + }, + "endpoints": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/endpoints" + }, + "login": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/login" + }, + "logout": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/logout" + }, + "register": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/register" + }, + "server_audit": { + "params": {}, + "template": "http://10.1.1.1:8080/api/v1/server/audit" + }, + "server_settings": { + "params": { + "key": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/server/settings/{key}" + }, + "upload": { + "params": { + "filename": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/upload/{filename}" + }, + "user": { + "params": { + "target_username": { + "type": "string" + } + }, + "template": "http://10.1.1.1:8080/api/v1/user/{target_username}" + } + } +} +``` diff --git a/docs/API/V1/inventory.md b/docs/API/V1/inventory.md index 26f1b462f4..2d9d66934c 100644 --- a/docs/API/V1/inventory.md +++ b/docs/API/V1/inventory.md @@ -1,4 +1,4 @@ -# `GET /api/v1/datasets/inventory/` +# `GET /api/v1/datasets//inventory/[]` This API returns an `application/octet-stream` document containing the raw byte stream of a regular file at the `` within the `` tarball @@ -33,9 +33,11 @@ See [Access model](../access_model.md) ## Response status +`200` **OK** \ +Successful request. + `401` **UNAUTHORIZED** \ -The client is not authenticated and does not have READ access to the specified -dataset. +The client is not authenticated. `403` **FORBIDDEN** \ The authenticated client does not have READ access to the specified dataset. diff --git a/docs/API/V1/list.md b/docs/API/V1/list.md index 78c423b826..2aa7290365 100644 --- a/docs/API/V1/list.md +++ b/docs/API/V1/list.md @@ -1,11 +1,15 @@ -# `GET /api/v1/datasets/list` +# `GET /api/v1/datasets` -This API returns an `application/json` document describing the set of datasets -accessible to the client. (An unauthenticated client can only access "public" -datasets.) +This API returns an `application/json` document describing a filtered +collection of datasets accessible to the client. (An unauthenticated client +can only list datasets with access `public`.) -The list of datasets may be further filtered by owner, access, name substring, -or by creation date range using the query parameters. +The collection of datasets may be filtered using any combination of a number +of query parameters, including `owner`, `access`, `name` substring, date range, +and arbitrary metadata filter expressions. + +Large collections can be paginated for efficiency using the `limit` and `offset` +query parameters. ## Query parameters @@ -25,6 +29,37 @@ If the timezone offset is omitted it will be assumed to be UTC (`+00:00`); if the time is omitted it will be assumed as midnight (`00:00:00`) on the specified date. +`filter` metadata filtering \ +Select datasets matching the metadata expressions specified via `filter` +query parameters. Each expression is the name of a metadata key (for example, +`dataset.name`), followed by a colon (`:`) and the comparison string. The +comparison string may be prefixed with a tilde (`~`) to make it a partial +("contains") comparison instead of an exact match. For example, +`dataset.name:foo` looks for datasets with the name "foo" exactly, whereas +`dataset.name:~foo` looks for datasets with a name containing the substring +"foo". + +These may be combined across multiple `filter` query parameters or as +comma-separated lists in a single query parameter. Multiple filter expressions +form an `AND` expression, however consecutive filter expressions can be joined +in an `OR` expression by using the circumflex (`^`) character prior to each. +(The first expression with `^` begins an `OR` list while the first subsequent +expression outout `^` ends the `OR` list and is combined with an `AND`.) + +For example, +- `filter=dataset.name:a,server.origin:EC2` returns datasets with a name of +"a" and an origin of "EC2". +- `filter=dataset.name:a,^server.origin:EC2,^dataset.metalog.pbench.script:fio` +returns datasets with a name of "a" and *either* an origin of "EC2" or generated +from the "pbench-fio" script. + +_NOTE_: `filter` expression values, like the `true` in +`GET /api/v1/datasets?filter=server.archiveonly:true`, are always interpreted +as strings, so be careful about the string representation of the value (in this +case, a boolean, which is represented in JSON as `true` or `false`). Beware +especially when attempting to match a JSON document (such as +`dataset.metalog.pbench`). + `limit` integer \ "Paginate" the selected datasets by returning at most `limit` datasets. This can be used in conjunction with `offset` to progress through the full list in @@ -40,6 +75,11 @@ example, the following are all equivalent: * `?metadata=dataset.created&metadata=dataset.deletion,user` * `?metadata=dataset.created&metadata=dataset.deletion&metadata=user` +`mine` boolean \ +Allows filtering for datasets owned by the authenticated client (if the value +is omitted, e.g., `?mine` or `?mine=true`) or owned by *other* users (e.g., +`?mine=false`). + `offset` integer \ "Paginate" the selected datasets by skipping the first `offset` datasets that would have been selected by the other query terms. This can be used with @@ -57,6 +97,15 @@ If the timezone offset is omitted it will be assumed to be UTC (`+00:00`); if the time is omitted it will be assumed as midnight (`00:00:00`) on the specified date. +`keysummary` boolean \ +Instead of displaying a list of selected datasets and metadata, use the set of +specified filters to accumulate a nested report on the metadata key namespace +for the set of datasets. See [metadata](../metadata.md) for deails on the +Pbench Server metadata namespaces. Because the `global` and `user` namespaces +are completely dynamic, and the `dataset.metalog` sub-namespace varies greatly +across Pbench Agent benchmark scripts, this mode provides a mechanism for a +metadata visualizer to understand what's available for a set of datasets. + ## Request headers `authorization: bearer` token [_optional_] \ @@ -79,9 +128,12 @@ See [Access model](../access_model.md) ## Response status +`200` **OK** \ +Successful request. + `401` **UNAUTHORIZED** \ The client did not provide an authentication token but asked to filter datasets -by `owner` or `access=private`. +by `owner`, `access=private`, `mine`, or asked for `user` namespace metadata. `403` **FORBIDDEN** \ The client asked to filter `access=private` datasets for an `owner` for which @@ -95,8 +147,27 @@ a message, and optional JSON data provided by the system administrator. ## Response body +### Dataset list + The `application/json` response body contains a list of objects which describe -the datasets selected by the specified query criteria. +the datasets selected by the specified query criteria, along with the total +number of matching datasets and a `next_url` to support pagination. + +#### next_url + +When pagination is used, this gives the full URI to acquire the next page using +the same `metadata` and `limit` values. The client can simply `GET` this URI for +the next page. When the entire collection has been returned, `next_url` will be +null. + +#### total + +The total number of datasets matching the filter criteria regardless of the +pagination settings. + +#### results + +The paginated dataset collection. Each of these objects contains the following fields: * `resource_id`: The internal unique ID of the dataset within the Pbench Server. @@ -109,20 +180,138 @@ display purposes and must not be assumed to be unique or definitive. JSON object in this field. For example, the query -`GET http://host/api/v1/datasets/list?metadata=user.dashboard.favorite` +`GET http://host/api/v1/datasets/list?metadata=user.dashboard.favorite&limit=3` might return: ```json -[ - { - "name": "pbench-fio_config_2022-06-29:00:00:00", - "resource_id": "07f0a9cb817e258a54dbf3444abcd3aa", - "metadata": {"user.dashboard.favorite": true} +{ + "next_url": "http://pbench.example.com/api/v1/datasets?limit=3&metadata=user.dashboard.favorite&offset=3", + "results": [ + { + "metadata": { + "user.dashboard.favorite": null + }, + "name": "pbench-user-benchmark__2023.03.23T20.26.03", + "resource_id": "001ab7f04079f620f6f624b6eea913df" + }, + { + "metadata": { + "user.dashboard.favorite": null + }, + "name": "pbench-user-benchmark__2023.03.18T19.07.42", + "resource_id": "006fab853eb42907c6c202af1d6b750b" + }, + { + "metadata": { + "user.dashboard.favorite": null + }, + "name": "fio__2023.03.28T03.58.19", + "resource_id": "009ad5f818d9a32af6128dd2b0255161" + } + ], + "total": 722 +} +``` + +### Key namespace summary + +When the `keysummary` query parameter is `true` (e.g., either `?keysummary` or +`?keysummary=true`), instead of reporting a list of datasets and metadata for +each dataset, report a hierarchical representation of the aggregate metadata +namespace across all selected datasets. This returns much less data and is not +subject to pagination. + +"Leaf" nodes in the metadata tree are represented by `null` values while any +key with children will be represented as a nested JSON object showing those +child keys. From the example output below a client can identify many key paths +including `dataset.access` and `dataset.metalog.controller.hostname`. + +Any of the partial or complete key paths represented in the output document are +valid targets for metadata queries: for example `dataset.metalog.pbench.script` +is a "leaf" node, but `GET /api/v1/datasets?metadata=dataset.metalog.pbench` +will return a JSON document with the keys `config`, `date`, `hostname_f`, +`hostname_ip`, `hostname_s`, `iterations`, `name`, `rpm-version`, `script`, and +`tar-ball-creation-timestamp`. + +```json +{ + "dataset": { + "access": null, + "id": null, + "metalog": { + "controller": { + "hostname": null, + "hostname-alias": null, + "hostname-all-fqdns": null, + "hostname-all-ip-addresses": null, + "hostname-domain": null, + "hostname-fqdn": null, + "hostname-ip-address": null, + "hostname-nis": null, + "hostname-short": null, + "ssh_opts": null + }, + "iterations/1-default": { + "iteration_name": null, + "iteration_number": null, + "user_script": null + }, + "pbench": { + "config": null, + "date": null, + "hostname_f": null, + "hostname_ip": null, + "hostname_s": null, + "iterations": null, + "name": null, + "rpm-version": null, + "script": null, + "tar-ball-creation-timestamp": null + }, + "run": { + "controller": null, + "end_run": null, + "raw_size": null, + "start_run": null + }, + "tools": { + "group": null, + "hosts": null, + "trigger": null + }, + "tools/dbutenho.bos.csb": { + "hostname-alias": null, + "hostname-all-fqdns": null, + "hostname-all-ip-addresses": null, + "hostname-domain": null, + "hostname-fqdn": null, + "hostname-ip-address": null, + "hostname-nis": null, + "hostname-short": null, + "label": null, + "rpm-version": null, + "tools": null, + "vmstat": null + }, + "tools/dbutenho.bos.csb/vmstat": { + "install_check_output": null, + "install_check_status_code": null, + "options": null + } + }, + "name": null, + "owner_id": null, + "resource_id": null, + "uploaded": null }, - { - "name": "the dataset I created for fun", - "resource_id": "8322d8043755ccd33dc6d7091d1f9ff9", - "metadata": {"user.dashboard.favorite": false} + "server": { + "deletion": null, + "index-map": { + "container-pbench.v6.run-data.2023-03": null, + "container-pbench.v6.run-toc.2023-03": null + }, + "origin": null, + "tarball-path": null } -] +} ``` diff --git a/docs/API/V1/login.md b/docs/API/V1/login.md deleted file mode 100644 index 73afd13bf8..0000000000 --- a/docs/API/V1/login.md +++ /dev/null @@ -1,6 +0,0 @@ -# `POST /api/v1/login` - -This API generates an HTTP authentication token for a registered Pbench Server -user. - -__TBD__ diff --git a/docs/API/V1/logout.md b/docs/API/V1/logout.md deleted file mode 100644 index f9ebf8f82d..0000000000 --- a/docs/API/V1/logout.md +++ /dev/null @@ -1,5 +0,0 @@ -# `POST /api/v1/logout` - -This API invalidates an active HTTP authentication token. - -__TBD__ diff --git a/docs/API/V1/publish.md b/docs/API/V1/publish.md deleted file mode 100644 index ed5d61a648..0000000000 --- a/docs/API/V1/publish.md +++ /dev/null @@ -1,7 +0,0 @@ -# `GET /api/v1/datasets/publish/` - -This API sets the access property of the identified dataset to either "private" -(allowing access only by the owner and users with administrator role) or -"public" (allowing access by any client). - -__TBD__ diff --git a/docs/API/V1/register.md b/docs/API/V1/register.md deleted file mode 100644 index 8754fab2cb..0000000000 --- a/docs/API/V1/register.md +++ /dev/null @@ -1,5 +0,0 @@ -# `POST /api/v1/register` - -This API registers a new Pbench Server V1 user. - -__TBD__ diff --git a/docs/API/V1/server_audit.md b/docs/API/V1/server_audit.md new file mode 100644 index 0000000000..b43fa5d39d --- /dev/null +++ b/docs/API/V1/server_audit.md @@ -0,0 +1,238 @@ +# `GET /api/v1/server/audit` + +This API returns the Pbench Server audit log as an `application/json` document. +Various query parameters are available to filter the returned records. + +## Query parameters + +### `end` +The latest date to return. + +### `start` +The earliest date to return. + +### `dataset` +This is an alias for specifying [#object_id] and [#object_type] to select all +audit records for a specific dataset. + +### `name` +Each type of Pbench Server "actor" has a simple name, so it's easy to select +all upload or index operations. +* `config`: Server configuration values were modified. +* `metadata`: Dataset metadata values were modified. +* `upload`: A dataset was uploaded to the server. + +### `object_id` +Select by the object ID: the `resource_id` for datasets, or the OIDC ID for +users. (Server configuration settings have no ID.) This allows +selecting datasets or users that no longer exist, or have been renamed. + +### `object_name` +Select by the name of an object at the time the audit record was generated. If +an object is deleted, or the object name is changed, older audit records retain +the previous name and can be used to track "phases in the object's evolution". +To track a dataset across name changes, use `object_id` and `object_type`, or +`dataset`. + +### `object_type` +Select by the object type. +* `DATASET`: Dataset objects. +* `CONFIG`: Server config settings. +* `TEMPLATE`: Elasticsearch templates. +* `NONE`: Unspecified. +* `TOKEN`: API Key tokens. + +### `operation` +The CRUD operation type associated with the audit records. + +* `CREATE`: A resource was created. +* `READ`: A resource was read. (The Pbench Server does not generally audit read operations.) +* `UPDATE`: A resource was updated. +* `DELETE`: A resource was deleted. + +### `reason` +Failure reason codes: additional information will be encoded in the `attributes` +JSON object, but can't be filtered directly. + +* `PERMISSION`: The operation failed due to a permission failure. +* `INTERNAL`: The operation failed due to internal Pbench Server processing errors. +* `CONSISTENCY`: The operation failed due to resource or process consistency issues. + +### `status` +Each linked set of audit records begins with a `BEGIN` record; the status of the +finalization record reflects the completion status. + +* `BEGIN`: Begin an operation. +* `SUCCESS`: Successful completion of an operation. +* `FAILURE`: Total failure of an operation. +* `WARNING`: Partial failure of an operation. + +### `user_id` +The OIDC ID of the user responsible for the operation. + +### `user_name` +The username of the user responsible for the operation, or `BACKGROUND` when there's +no active user. + +## Request headers + +`authorization: bearer` token \ +*Bearer* schema authorization for a user holding the `ADMIN` role is required +to access audit log data. + +E.g., `authorization: bearer ` + +## Response headers + +`content-type: application/json` \ +The return is a serialized JSON object with the selected audit log records. + +## Response status + +`200` **OK** \ +Successful request. + +`401` **UNAUTHORIZED** \ +The client is not authenticated. + +`403` **FORBIDDEN** \ +The authenticated client does not hold the `ADMIN` role required to access the +audit log. + +`503` **SERVICE UNAVAILABLE** \ +The server has been disabled using the `server-state` server configuration +setting in the [server configuration](./server_config.md) API. The response +body is an `application/json` document describing the current server state, +a message, and optional JSON data provided by the system administrator. + +## Response body + +The `application/json` response body is a JSON document containing the selected +audit records. + +### Examples + +The `root_id` links multiple audit records from the `id` of the `BEGIN` operation +record. + +The `attributes` JSON provides any additional information on the operation, +including at least a `message` field on failure. + +The absolute UTC `timestamp` when the audit record was generated. + +```python +GET /api/v1/server/audit?start=2023-03-26&name=upload&status=success + +[ + { + "attributes": { + "access": "public", + "metadata": { + "global.server.legacy.hostname": "n010.intlab.redhat.com", + "global.server.legacy.sha1": "9a54d5281", + "global.server.legacy.version": "0.69.11" + } + }, + "id": 24156, + "name": "upload", + "object_id": "15a047579afab000606769e35e6aa478", + "object_name": "fio__2023.03.26T00.14.30", + "object_type": "DATASET", + "operation": "CREATE", + "reason": null, + "root_id": 24155, + "status": "SUCCESS", + "timestamp": "2023-03-26T00:29:13.640724+00:00", + "user_id": "3", + "user_name": "legacy" + }, + { + "attributes": { + "access": "public", + "metadata": { + "global.server.legacy.hostname": "n010.intlab.redhat.com", + "global.server.legacy.sha1": "9a54d5281", + "global.server.legacy.version": "0.69.11" + } + }, + "id": 24192, + "name": "upload", + "object_id": "f71a5a714e64649df9de0e5d68d52af9", + "object_name": "uperf__2023.03.26T00.28.47", + "object_type": "DATASET", + "operation": "CREATE", + "reason": null, + "root_id": 24191, + "status": "SUCCESS", + "timestamp": "2023-03-26T00:33:12.407221+00:00", + "user_id": "3", + "user_name": "legacy" + }, + { + "attributes": { + "access": "public", + "metadata": { + "global.server.legacy.hostname": "n010.intlab.redhat.com", + "global.server.legacy.sha1": "9a54d5281", + "global.server.legacy.version": "0.69.11" + } + }, + "id": 24236, + "name": "upload", + "object_id": "d1993694695a5eb3cb9f34902f0e31ce", + "object_name": "uperf__2023.03.26T00.36.50", + "object_type": "DATASET", + "operation": "CREATE", + "reason": null, + "root_id": 24235, + "status": "SUCCESS", + "timestamp": "2023-03-26T00:41:12.851840+00:00", + "user_id": "3", + "user_name": "legacy" + }, + { + "attributes": { + "access": "public", + "metadata": { + "global.server.legacy.hostname": "n010.intlab.redhat.com", + "global.server.legacy.sha1": "9a54d5281", + "global.server.legacy.version": "0.69.11" + } + }, + "id": 24450, + "name": "upload", + "object_id": "d69af9c9d827f2cd553f5ee535be4649", + "object_name": "fio__2023.03.26T00.44.42", + "object_type": "DATASET", + "operation": "CREATE", + "reason": null, + "root_id": 24449, + "status": "SUCCESS", + "timestamp": "2023-03-26T02:14:14.539689+00:00", + "user_id": "3", + "user_name": "legacy" + }, + { + "attributes": { + "access": "public", + "metadata": { + "global.server.legacy.hostname": "n010.intlab.redhat.com", + "global.server.legacy.sha1": "9a54d5281", + "global.server.legacy.version": "0.69.11" + } + }, + "id": 24534, + "name": "upload", + "object_id": "141b8c75d66a0e0d1e13eb9a7face6b9", + "object_name": "uperf__2023.03.26T02.26.33", + "object_type": "DATASET", + "operation": "CREATE", + "reason": null, + "root_id": 24533, + "status": "SUCCESS", + "timestamp": "2023-03-26T02:42:13.794463+00:00", + "user_id": "3", + "user_name": "legacy" + }, +] +``` diff --git a/docs/API/V1/server_settings.md b/docs/API/V1/server_settings.md index b9b39b3fbb..c17b72952e 100644 --- a/docs/API/V1/server_settings.md +++ b/docs/API/V1/server_settings.md @@ -34,7 +34,7 @@ server settings. ### Examples -``` +```python GET /api/v1/server/settings/dataset-lifetime { "dataset-lifetime": "4" @@ -83,7 +83,7 @@ the `value` query parameter. You can do this even if the value is a simple string, although it's more useful when you need to specify a JSON object value. For example, -``` +```python PUT /api/v1/server/settings/server-state { "value": {"status": "enabled"} @@ -94,7 +94,7 @@ If you omit the `{key}` value from the URI, specify all server settings you wish to change in the `application/json` request body. You can specify a single server setting, or any group of server settings at once. For example, -``` +```python PUT /api/v1/server/settings/ { "server-state": {"status": "disabled", "message": "down for maintenance"}, @@ -116,11 +116,12 @@ The response body is a serialized JSON object with the selected server settings. ## Response status `401` **UNAUTHORIZED** \ -The client did not provide an authentication token. +The client is attempting to change server settings with `PUT` and did not +provide an authentication token. `403` **FORBIDDEN** \ -The client's authentication token does not correspond to a user with `ADMIN` -role. +The client is attempting to change server settings with `PUT` and the provided +authentication token does not correspond to a user with `ADMIN` role. ## Response body @@ -129,12 +130,12 @@ The `application/json` response body for `PUT` is exactly the same as for only the server settings that were changed in the `PUT`. _This request:_ -``` +```python PUT /api/v1/server/settings/dataset-lifetime?value=4 ``` _returns this response:_ -``` +```python { "dataset-lifetime": "4" } @@ -142,7 +143,7 @@ _returns this response:_ _And this request:_ -``` +```python PUT /api/v1/server/settings { "dataset-lifetime": "4 days", @@ -151,7 +152,7 @@ PUT /api/v1/server/settings ``` _returns this response:_ -``` +```python { "dataset-lifetime": "4", "server-state": {"status": "enabled"} @@ -174,7 +175,7 @@ The number of days is specified as an string representing an integer, optionally followed by a space and `day` or `days`. For example, "4" or "4 days" or "4 day" are equivalent. -``` +```python { "dataset-lifetime": "4" } @@ -194,7 +195,7 @@ any information in this JSON object. For example, the following are examples of valid banners: -``` +```python { "server-banner": { "message": "Have a Happy Pbench Day" @@ -202,7 +203,7 @@ For example, the following are examples of valid banners: } ``` -``` +```python { "server-banner": { "message": "The server will be down for 2 hours on Monday, July 31", diff --git a/docs/API/V1/update.md b/docs/API/V1/update.md new file mode 100644 index 0000000000..b70ac656de --- /dev/null +++ b/docs/API/V1/update.md @@ -0,0 +1,79 @@ +# `POST /api/v1/datasets/?access=&owner=` + +This API sets the access and/or name property of the identified dataset. The +specified `` can be either `private` or `public`, or the `access` +query parameter can be omitted to set only the owner. The `` can be +any username known to the Pbench Server, or the `owner` query parameter can +be omitted to set only the access. + +## URI parameters + +`` string \ +The resource ID of a dataset on the Pbench Server. + +## Query parameters + +`access` [`private` | `public` ] \ +The desired access scope of the dataset. This requires that the authenticated +user have `UPDATE` access to the dataset. Select `public` to make the dataset +accessible to all clients, or `private` to make the dataset accessible only +to the owner. + +`owner` valid username \ +A valid Pbench Server username to be given ownership of the specified dataset. +This requires the authenticated user to hold `ADMIN` role establishing +full access to both the current and new owners. + +## Request headers + +`authorization: bearer` token \ +*Bearer* schema authorization is required to update a dataset. +E.g., `authorization: bearer ` + +## Response headers + +`content-type: application/json` \ +The return is a serialized JSON object with status feedback. + +## Resource access + +* Requires `UPDATE` access to the `` resource, and, for `owner`, the +`ADMIN` role. + +See [Access model](../access_model.md) + +## Response status + +`200` **OK** \ +Successful request. + +`401` **UNAUTHORIZED** \ +The client is not authenticated. + +`403` **FORBIDDEN** \ +The authenticated client does not have `UPDATE`` access to the specified dataset. + +`404` **NOT FOUND** \ +The `` resource ID does not exist. + +`503` **SERVICE UNAVAILABLE** \ +The server has been disabled using the `server-state` server configuration +setting in the [server configuration](./server_config.md) API. The response +body is an `application/json` document describing the current server state, +a message, and optional JSON data provided by the system administrator. + +## Response body + +The `application/json` response body consists of a JSON object summarizing the +Elasticsearch index updates. For example, if the dataset has 9 Elasticsearch +index documents and all are updated successfully, + +```json +{ + "failure": 0, + "ok": 9 +} +``` + +If the dataset had not been indexed, both numbers will be 0. A non-zero +`"failure"` indicates a partial success, which can be retried. diff --git a/docs/API/V1/upload.md b/docs/API/V1/upload.md new file mode 100644 index 0000000000..031c7382c4 --- /dev/null +++ b/docs/API/V1/upload.md @@ -0,0 +1,106 @@ +# `PUT /api/v1/upload/` + +This API creates a dataset resource by uploading a tarball to the Pbench Server. +The tarball must be compressed with the `xz` program, and have the compound +file type suffix of ".tar.xz". + +Primarily this is expected to be a native Pbench Agent tarball with a specific +structure; however with the `server.archiveonly` metadata key the Pbench Server +can be used to archive and manage metadata for any tarball. + +## URI parameters + +`` string \ +The initial name of the dataset; if `server.archiveonly` is not set, the name must +match the internal name recorded by the Pbench Agent. + +## Query parameters + +`access` [ `private` | `public` ] \ +The desired initial access scope of the dataset. Select `public` to make the dataset +accessible to all clients, or `private` to make the dataset accessible only +to the owner. The default is `private`. + +For example, `?access=public` + +`metadata` metadata keys \ +A set of desired Pbench Server metadata keys to be assigned to the new dataset. +You can set the initial resource name (`dataset.name`), for example, as well as +assigning any keys in the `global` and `user` namespaces. See +[metadata](../metadata.md) for more information. + +In particular the client can set any of: +* `dataset.name`: [default dataset name](../metadata.md#datasetname) +* `server.origin`: [dataset origin](../metadata.md#serverorigin) +* `server.archiveonly`: [suppress indexing](../metadata.md#serverarchiveonly) +* `server.deletion`: [default dataset expiration time](../metadata.md#serverdeletion). + +For example, `?metadata=server.archiveonly:true,global.project:oidc` + +## Request headers + +`authorization: bearer` token \ +*Bearer* schema authorization assigns the ownership of the new dataset to the +authenticated user. E.g., `authorization: bearer ` + +`content-length` tarball size \ +The size of the request octet stream in bytes. Generally supplied automatically by +an upload agent such as Python `requests` or `curl`. + +`content-md5` MD5 hash \ +The MD5 hash of the compressed tarball file. This must match the actual tarball +octet stream provided as the request body. + +## Response headers + +`content-type: application/json` \ +The return is a serialized JSON object with status information. + +## Response status + +`200` **OK** \ +Successful request. The dataset MD5 hash is identical to that of a dataset +previously uploaded to the Pbench Server. This is assumed to be an identical +tarball. + +`201` **CREATED** \ +The tarball was successfully uploaded and the dataset has been created. + +`400` **BAD_REQUEST** \ +One of the required headers is missing or incorrect, invalid query parameters +were specified, or a bad value was specified for a query parameter. The return +payload will be a JSON document with a `message` field containing details. + +`401` **UNAUTHORIZED** \ +The client is not authenticated. + +`503` **SERVICE UNAVAILABLE** \ +The server has been disabled using the `server-state` server configuration +setting in the [server configuration](./server_config.md) API. The response +body is an `application/json` document describing the current server state, +a message, and optional JSON data provided by the system administrator. + +## Response body + +The `application/json` response body consists of a JSON object giving a detailed +message on success or failure: + +```json +{ + "message": "Dataset already exists", + "errors": [ ] +} +``` + +or + +```json +{ + "message": "at least one specified metadata key is invalid", + "errors": [ + "Metadata key 'server.archiveonly' value 'abc' for dataset must be a boolean", + "improper metadata syntax dataset.name=test must be 'k:v'", + "Key test.foo is invalid or isn't settable", + ], +} +``` diff --git a/docs/API/V1/user.md b/docs/API/V1/user.md deleted file mode 100644 index 01ab64ae14..0000000000 --- a/docs/API/V1/user.md +++ /dev/null @@ -1,7 +0,0 @@ -# `GET/PUT/DELETE /api/v1/user/` - -This API allows viewing, changing, or deleting the Pbench Server profile for -a designated user. The authenticated user must be the user being accessed, or -must hold an administrator role. - -__TBD__ diff --git a/docs/API/access_model.md b/docs/API/access_model.md index f25ae3cafa..a1a47396d6 100644 --- a/docs/API/access_model.md +++ b/docs/API/access_model.md @@ -19,20 +19,11 @@ the administrator *role*, can UPDATE or DELETE the dataset. ## Roles -The Pbench Server user model allows assigning an `ADMIN` role to one or more -user accounts on the server. These users will be granted full CRUD access to -all server data, including - -- All datasets -- All user profiles -- Server configuration settings - -A user with the `ADMIN` role can use the [user](V1/user.md) (profile) API to -assign the `ADMIN` role to other users. On installation of the Pbench Server -there are no users with `ADMIN` role, so the server management CLI (described -elsewhere) must be used to create an administrator account or assign the `ADMIN` -role to some user. +The Pbench Server access model allows assigning an `ADMIN` role to one or more +user accounts through the OIDC identity provider. These users will be granted +full CRUD access to all server data, including all datasets, server settings, +and audit logs. ## Groups -_TBD_ \ No newline at end of file +_TBD_ diff --git a/docs/API/metadata.md b/docs/API/metadata.md index 5ae968e596..f7c4e634d2 100644 --- a/docs/API/metadata.md +++ b/docs/API/metadata.md @@ -7,9 +7,33 @@ authenticated user, while other metadata is maintained internally by the server and can't be changed. Authenticated users can also add any additional metadata that might be of use. ->__NOTE__: right now the ability to search and filter using metadata is -limited, but our intent is to be able to use any defined metadata value both -for searches and to filter the results of [datasets/list](V1/list.md). +Dataset metadata is represented as a set of nested JSON objects. There are four +distinct key namespaces. These can be addressed (read or changed) at any level +of the hierarchy using a dotted name path, for example `dataset.resource_id` +for a dataset's resource ID, or `global.environment.cluster.ip`. The keys are +lowercase alphabetic, plus digits, hyphen, and underscore: so `global.u-7` is +OK, but `global.Frank` isn't. + +The four namespaces are: +* `dataset` provides inherent attributes of the dataset, including the full +`metadata.log` as `dataset.metalog`. Most of these attributes cannot be changed +after creation. +* `server` provides server management state about a dataset. Most of these +cannot be changed by the user. While many may not be directly meaningful to the +user, the Pbench Server does not hide them. (Beware that retrieving the entire +`server` namespace may result in a substantial amount of data that's of little +use to a client.) +* `global` provides user-controlled dataset metadata which can only be modified +by the owner of the dataset, but is visible to anyone with read access to the +dataset. By convention, a client should use a unique second-level key to avoid +conflicting paths. For example, the Pbench Dashboard uses `global.dashboard`. +* `user` provides a metadata namespace for each dataset that's private to the +authenticated user: each user will see their own set of nested object structure +and values, and these are not shareable. Even if you don't own a dataset you +can set your own private `user` metadata to help you categorize that dataset +and to find it again. By convention, a client should use a unique second-level +key to avoid conflicting paths. For example, the Pbench Dashboard uses +`user.dashboard`. When a dataset is first processed, the Pbench Server will populate basic metadata, including the creation timestamp, the owner of the dataset (the @@ -21,26 +45,11 @@ under the `dataset` metadata key namespace. The Pbench Server will also calculate a default deletion date for the dataset based on the owner's retention policy and the server administrator's retention policy along with some other internal management context. The expected deletion -date is accessible under the `server` metadata key namespace. - -Clients can also set arbitrary metadata through the "global" and "user" -metadata namespaces: -* The "global" namespace can only be modified by the owner of the dataset, -and is visible to anyone with read access to the dataset. -* The "user" namespace is private to each authenticated user, and even if you -don't own a dataset you can set your own private "user" metadata to help you -categorize that dataset and to find it again. - -Metadata namespaces are hierarchical, and are exposed as nested JSON objects. -You can address an entire namespace, e.g., `global` or `dataset` and -retrieve the entire JSON object, or you can address nested objects or values -using a dotted metadata key path like `global.contact.email` or -`dataset.metalog.pbench.script`. +date is accessible under the `server` metadata key namespace as +`server.deletion` -By convention, a Pbench Server client should create a sub-namespace to minimize -the risk of key collisions within the `global` and `user` namespaces. The -Pbench Dashboard client, for example, uses `global.dashboard.seen` and -`user.dashboard.favorite`. +Clients can also set arbitrary metadata through the `global` and `user` +metadata namespaces. For example, given the following hypothetical `user` JSON value: @@ -54,7 +63,7 @@ For example, given the following hypothetical `user` JSON value: requesting the metadata `user` (e.g., with `/api/v1/datasets/list?metadata=user`) would return the entire JSON value. In addition: -* `project` would return `["OCP", "customer"]` +* `user.project` would return `["OCP", "customer"]` * `user.tracker.examined` would return `"2022-05-15"` * `user.analysis` would return `{"cpu": "high", "memory": "nominal"}` @@ -80,19 +89,22 @@ to exist if the associated dataset is deleted. ### Dataset namespace This defines the dataset resource, and contains metadata received from the -Pbench Agent, including the full contents of a `metadata.log` file created -while gathering results and during dataset packaging. +Pbench Agent, including the full contents of a `metadata.log` file if one is +present in the tarball. (Support for additional tarball formats is TBD.) + +The `metadata.log` data is represented under the key `dataset.metalog` and can +be queried as part of the entire dataset using the `dataset` key, as a discrete +sub-document using `dataset.metalog` in specific "sections" such as +`dataset.metalog.pbench` or targeting a specific value like +`dataset.metalog.pbench.script`. + +#### `dataset.name` This namespace includes the resource name, which can be modified by the owner of the dataset by setting the metadata key `dataset.name`. All other key values in this namespace are controlled by the server and cannot be changed by the client. -The `metadata.log` data is represented under the key `dataset.metalog` and can -be queried as part of the entire dataset using the `dataset` key, as a discrete -subset using `dataset.metalog` or in specific subsets like -`dataset.metalog.pbench`. - ### Server namespace This defines internal Pbench Server management state related to a dataset @@ -100,22 +112,29 @@ that's not inherent to the representation of the user's performance metrics. These are generally not useful to clients, and some can be large. There are three values in this namespace that clients can modify: -* `server.deletion` is a date after which the Pbench Server may choose to -delete the dataset. This is computed when a dataset is received based on user -profile preferences and server configuration; but it can be modified by the -owner of the dataset, as long as the new timestamp remains within the maximum -allowed server data retention period. -* `server.archiveonly` is a boolean that can be set to a boolean True when a -dataset is first uploaded to prevent the Pbench Server from unpacking or -indexing the dataset. That is, the server will archive the dataset and it can -be retrieved for offline analysis but the server will do nothing else with it. -The value can be specified as "t", "true", "y" or "yes" (case insensitive) for -True, and "f", "false", "n", or "no" (also case insensitive) for False. Note -that this is currently only interpreted by the Pbench Server when a dataset is -first uploaded, and will inhibit unpacking and indexing the dataset. It can be -changed later, but the server currently takes no action on such changes. -* `server.origin` is a way to record the origin of a dataset. This is a string -value, and the Pbench Server does not interpret it. +#### `server.deletion` + +This is a date after which the Pbench Server may choose to delete the dataset. +This is computed when a dataset is received based on user profile preferences +and server configuration; but it can be modified by the owner of the dataset, +as long as the new timestamp remains within the maximum allowed +[server data retention period](./V1/server_settings.md#dataset-lifetime). + +#### `server.archiveonly` + +This is a boolean that can be set to a boolean True when a dataset is first +uploaded to prevent the Pbench Server from unpacking or indexing the dataset. +The server will archive the dataset and it can be retrieved for offline +analysis but the server will do nothing else with it. The value can be +specified as "t", "true", "y" or "yes" (case insensitive) for True, and "f", +"false", "n", or "no" (also case insensitive) for False. Note that this is +currently only interpreted by the Pbench Server when a dataset is first +uploaded, and has no effect if modified later. + +#### `server.origin` + +This is defined to provide a common mechanism to record the origin of a +dataset. This is a string value, and the Pbench Server does not interpret it. ### Global namespace @@ -131,18 +150,18 @@ project, for example, will store all client metadata under the `global.dashboard sub-namespace, for example `global.dashboard.seen`. A hypothetical client named "clienta" might use `global.clienta`, for example `global.clienta.configuration`. -__NOTE__: The server will in the future be able to use these values to filter -the selected datasets for [datasets/list](V1/list.md). +Pbench Server clients can use metadata to filter selected datasets in the +collection browser, [datasets](V1/list.md). ### User namespace The server will never modify or directly interpret values in this namespace. An -authenticated client representing the owner of a dataset can set any keys -within this namespace to any valid JSON values (string, number, boolean, list, -or nested objects) for retrieval later. Each authenticated client may set -distinct values for the same keys, or use completely different keys, and can -retrieve those values later. A client authenticated for another user has -its own comletely unique `user` namespace. +authenticated client able to see a dataset can set metadata keys within this +namespace to any valid JSON values (string, number, boolean, list, or nested +objects) for retrieval later. Each authenticated client may set distinct values +for the same keys, or use completely different keys, and can retrieve those +values later. A client authenticated for another user has its own completely +unique `user` namespace. The `user` metadata namespace behaves as a user-specific sub-resource under the dataset. Any authenticated client has UPDATE and DELETE access to this private @@ -152,9 +171,10 @@ Server access controls. The recommended best practice is to select a project sub-key that will be unique to minimize the risk of collisions between various clients. The Pbench Dashboard -project, for example, will store all client metadata under the `user.dashboard` -sub-namespace, for example `user.dashboard.favorite`. A hypothetical client -named "clienta" might use `user.clienta`, for example `user.clienta.configuration`. +project, for example, will store all user-specific client metadata under the +`user.dashboard` sub-namespace, for example `user.dashboard.favorite`. A +hypothetical client named "clienta" might use `user.clienta`, for example +`user.clienta.configuration`. An unauthenticated client can neither set nor retrieve any `user` namespace values; such a client will always see the `user` namespace as empty.