Skip to content

Commit

Permalink
address feedback (other than LirMapping fix)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgree committed Nov 4, 2024
1 parent cce4908 commit 7d11230
Show file tree
Hide file tree
Showing 17 changed files with 262 additions and 161 deletions.
41 changes: 37 additions & 4 deletions doc/user/content/sql/system-catalog/mz_introspection.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,19 @@ The `mz_dataflow_channel_operators` view associates [dataflow] channels with the

<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_dataflow_channel_operators_per_worker -->

## `mz_dataflow_global_ids`

The `mz_dataflow_global_ids` view associates [dataflow] ids with global ids (ids of the form `u8` or `t5`).

<!-- RELATION_SPEC mz_introspection.mz_dataflow_global_ids -->

| Field | Type | Meaning |
|------------- | ------- | -------- |
| `id` | [`uint8`] | The dataflow ID. |
| `global_id` | [`text`] | A global ID associated with that dataflow. |

<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_compute_dataflow_global_ids_per_worker -->

## `mz_dataflow_operators`

The `mz_dataflow_operators` view describes the [dataflow] operators in the system.
Expand Down Expand Up @@ -292,6 +305,29 @@ through a hierarchical scheme for either aggregation or Top K computations.
| `savings` | [`numeric`] | A conservative estimate of the amount of memory in bytes to be saved by applying the hint. |
| `hint` | [`double precision`] | The hint value that will eliminate `to_cut` levels from the region's hierarchy. |

## `mz_lir_mapping`

The `mz_lir_mapping` view describes the low-level internal representation (LIR) plan that corresponds to global ids.
LIR is a higher-level representation than dataflows; this view is used for profiling and debugging indices and materialized views.
Note that LIR is not a stable interface and may change at any time.
In particular, you should not attempt to parse `operator` descriptions.
LIR nodes are implemented by zero or more dataflow operators with sequential ids.
We use the range `[operator_id_start, operator_id_end)` to record this information.
If an LIR node was implemented without any dataflow operators, `operator_id_start` will be equal to `operator_id_end`.

<!-- RELATION_SPEC mz_introspection.mz_lir_mapping -->
| Field | Type | Meaning
| --------- | -------- | -----------
| global_id | [`text`] | The global ID.
| lir_id | [`uint8`] | The LIR node ID.
| operator | [`text`] | The LIR operator, in the format `OperatorName INPUTS [OPTIONS]`.
| parent_lir_id | [`uint8`] | The parent of this LIR node. May be `NULL`.
| nesting | [`uint2`] | The nesting level of this LIR node.
| operator_id_start | [`uint8`] | The first dataflow operator ID implementing this LIR operator (inclusive).
| operator_id_end | [`uint8`] | The first dataflow operator ID _after_ this LIR operator (exclusive).

<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_compute_lir_mapping_per_worker -->

## `mz_message_counts`

The `mz_message_counts` view describes the messages and message batches sent and received over the [dataflow] channels in the system.
Expand Down Expand Up @@ -395,6 +431,7 @@ The `mz_scheduling_parks_histogram` view describes a histogram of [dataflow] wor
[`numeric`]: /sql/types/numeric
[`text`]: /sql/types/text
[`uuid`]: /sql/types/uuid
[`uint2`]: /sql/types/uint2
[`uint8`]: /sql/types/uint8
[`uint8 list`]: /sql/types/list
[arrangement]: /get-started/arrangements/#arrangements
Expand All @@ -408,7 +445,3 @@ The `mz_scheduling_parks_histogram` view describes a histogram of [dataflow] wor
<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_dataflow_operator_reachability -->
<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_dataflow_operator_reachability_per_worker -->
<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_dataflow_operator_reachability_raw -->
<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_compute_lir_mapping_per_worker -->
<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_compute_dataflow_globalids_per_worker -->
<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_lir_mapping -->
<!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_dataflow_globalids -->
14 changes: 7 additions & 7 deletions src/catalog/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1911,11 +1911,11 @@ pub static MZ_COMPUTE_EXPORTS_PER_WORKER: LazyLock<BuiltinLog> = LazyLock::new(|
access: vec![PUBLIC_SELECT],
});

pub static MZ_COMPUTE_DATAFLOW_GLOBALIDS_PER_WORKER: LazyLock<BuiltinLog> =
pub static MZ_COMPUTE_DATAFLOW_GLOBAL_IDS_PER_WORKER: LazyLock<BuiltinLog> =
LazyLock::new(|| BuiltinLog {
name: "mz_compute_dataflow_globalids_per_worker",
name: "mz_compute_dataflow_global_ids_per_worker",
schema: MZ_INTROSPECTION_SCHEMA,
oid: oid::LOG_MZ_COMPUTE_DATAFLOW_GLOBALIDS_PER_WORKER_OID,
oid: oid::LOG_MZ_COMPUTE_DATAFLOW_GLOBAL_IDS_PER_WORKER_OID,
variant: LogVariant::Compute(ComputeLog::DataflowGlobal),
access: vec![PUBLIC_SELECT],
});
Expand Down Expand Up @@ -4072,13 +4072,13 @@ WHERE worker_id = 0",
});

pub static MZ_DATAFLOW_GLOBAL_IDS: LazyLock<BuiltinView> = LazyLock::new(|| BuiltinView {
name: "mz_dataflow_globalids",
name: "mz_dataflow_global_ids",
schema: MZ_INTROSPECTION_SCHEMA,
oid: oid::VIEW_MZ_DATAFLOW_GLOBALIDS_OID,
oid: oid::VIEW_MZ_DATAFLOW_GLOBAL_IDS_OID,
column_defs: None,
sql: "
SELECT id, global_id
FROM mz_introspection.mz_compute_dataflow_globalids_per_worker
FROM mz_introspection.mz_compute_dataflow_global_ids_per_worker
WHERE worker_id = 0",
access: vec![PUBLIC_SELECT],
});
Expand Down Expand Up @@ -9136,7 +9136,7 @@ pub static BUILTINS_STATIC: LazyLock<Vec<Builtin<NameReference>>> = LazyLock::ne
Builtin::Log(&MZ_DATAFLOW_ADDRESSES_PER_WORKER),
Builtin::Log(&MZ_DATAFLOW_OPERATOR_REACHABILITY_RAW),
Builtin::Log(&MZ_COMPUTE_EXPORTS_PER_WORKER),
Builtin::Log(&MZ_COMPUTE_DATAFLOW_GLOBALIDS_PER_WORKER),
Builtin::Log(&MZ_COMPUTE_DATAFLOW_GLOBAL_IDS_PER_WORKER),
Builtin::Log(&MZ_MESSAGE_COUNTS_RECEIVED_RAW),
Builtin::Log(&MZ_MESSAGE_COUNTS_SENT_RAW),
Builtin::Log(&MZ_MESSAGE_BATCH_COUNTS_RECEIVED_RAW),
Expand Down
4 changes: 2 additions & 2 deletions src/compute-client/src/logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,14 +543,14 @@ impl LogVariant {
.with_column("nesting", ScalarType::UInt16.nullable(false))
.with_column("operator_id_start", ScalarType::UInt64.nullable(true))
.with_column("operator_id_end", ScalarType::UInt64.nullable(true))
.with_key(vec![0, 1])
.with_key(vec![0, 1, 2])
.finish(),

LogVariant::Compute(ComputeLog::DataflowGlobal) => RelationDesc::builder()
.with_column("id", ScalarType::UInt64.nullable(false))
.with_column("worker_id", ScalarType::UInt64.nullable(false))
.with_column("global_id", ScalarType::String.nullable(false))
.with_key(vec![0])
.with_key(vec![0, 1])
.finish(),
}
}
Expand Down
12 changes: 12 additions & 0 deletions src/compute-types/src/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ impl AvailableCollections {
}

/// An identifier for an LIR node.
///
/// LirIds start at 1, not 0, which let's us get a better struct packing in `ComputeEvent::LirMapping`.
#[derive(Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize)]
pub struct LirId(NonZeroU64);

Expand Down Expand Up @@ -1147,6 +1149,16 @@ mod tests {

use super::*;

#[mz_ore::test]
fn test_option_lirid_fits_in_usize() {
let option_lirid_size = std::mem::size_of::<Option<LirId>>();
let usize_size = std::mem::size_of::<usize>();
assert!(
option_lirid_size <= usize_size,
"Option<LirId> (size {option_lirid_size}) should fit in usize (size {usize_size})"
);
}

proptest! {
#![proptest_config(ProptestConfig::with_cases(10))]
#[mz_ore::test]
Expand Down
Loading

0 comments on commit 7d11230

Please sign in to comment.