From a19d92f80af330e10c57bf5a50e961785058b963 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Sun, 11 Aug 2024 23:12:39 +0100 Subject: [PATCH] Use `OnceCell` to cache Python operations We were previously using `RefCell` to handle the interior mutability for the Python-operation caches. This works fine, but has two problems: - `RefCell>` is two pointers wide - we had to do heavier dynamic runtime checks for the borrow status on every access, including simple gets. This commit moves the caching to instead use `OnceCell`. The internal tracking for a `OnceCell` is just whether an internal `Option` is in the `Some` variant, and since `Option>` will use the null-pointer optimisation for space, this means that `OnceCell>` is only a single pointer wide. Since it's not permissible to take out a mutable reference to the interior, there is also no dynamic runtime checking---just the null-pointer check that the cell is initialised---so access is faster as well. We can still clear the cache if we hold a mutable reference to the `OnceCell`, and since every method that can invalidate the cache also necessarily changes Rust-space, they certainly require mutable references to the cell, making it safe. There is a risk here, though, in `OnceCell::get_or_init`. This function is not re-entrant; one cannot attempt the initialisation while another thread is initialising it. Typically this is not an issue, since the type isn't `Sync`, however PyO3 allows types that are only `Send` and _not_ `Sync` to be put in `pyclass`es, which Python can then share between threads. This is usually safe due to the GIL mediating access to Python-space objects, so there are no data races. However, if the initialiser passed to `get_or_init` yields control at any point to the Python interpreter (such as by calling a method on a Python object), this gives it a chance to block the thread and allow another Python thread to run. The other thread could then attempt to enter the same cache initialiser, which is a violation of its contract. PyO3's `GILOnceCell` can protect against this failure mode, but this is inconvenient to use for us, because it requires the GIL to be held to access the value at all, which is problematic during `Clone` operations. Instead, we make ourselves data-race safe by manually checking for initialisation, calcuting the cache value ourselves, and then calling `set` or `get_or_init` passing the already created object. While the initialiser can still yield to the Python interpreter, the actual setting of the cell is now atomic in this sense, and thus safe. The downside is that more than one thread might do the same initialisation work, but given how comparitively rare the use of Python threads is, it's better to prioritise memory use and single-Python-threaded access times than one-off cache population in multiple Python threads. --- crates/circuit/src/circuit_data.rs | 20 ++++---- crates/circuit/src/circuit_instruction.rs | 49 ++++++++++--------- crates/circuit/src/dag_node.rs | 10 ++-- crates/circuit/src/packed_instruction.rs | 58 +++++++++++++---------- 4 files changed, 73 insertions(+), 64 deletions(-) diff --git a/crates/circuit/src/circuit_data.rs b/crates/circuit/src/circuit_data.rs index 68267121b5a8..d7e8f8427308 100644 --- a/crates/circuit/src/circuit_data.rs +++ b/crates/circuit/src/circuit_data.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::RefCell; +use std::cell::OnceCell; use crate::bit_data::BitData; use crate::circuit_instruction::{CircuitInstruction, OperationFromPython}; @@ -162,7 +162,7 @@ impl CircuitData { params, extra_attrs: None, #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(None), + py_op: OnceCell::new(), }); res.track_instruction_parameters(py, res.data.len() - 1)?; } @@ -218,7 +218,7 @@ impl CircuitData { params, extra_attrs: None, #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(None), + py_op: OnceCell::new(), }); res.track_instruction_parameters(py, res.data.len() - 1)?; } @@ -280,7 +280,7 @@ impl CircuitData { params, extra_attrs: None, #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(None), + py_op: OnceCell::new(), }); Ok(()) } @@ -542,7 +542,7 @@ impl CircuitData { params: inst.params.clone(), extra_attrs: inst.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(None), + py_op: OnceCell::new(), }); } } else if copy_instructions { @@ -554,7 +554,7 @@ impl CircuitData { params: inst.params.clone(), extra_attrs: inst.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(None), + py_op: OnceCell::new(), }); } } else { @@ -650,7 +650,7 @@ impl CircuitData { inst.extra_attrs = result.extra_attrs; #[cfg(feature = "cache_pygates")] { - *inst.py_op.borrow_mut() = Some(py_op.unbind()); + inst.py_op = py_op.unbind().into(); } } Ok(()) @@ -1142,7 +1142,7 @@ impl CircuitData { params: (!inst.params.is_empty()).then(|| Box::new(inst.params.clone())), extra_attrs: inst.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(inst.py_op.borrow().as_ref().map(|obj| obj.clone_ref(py))), + py_op: inst.py_op.clone(), }) } @@ -1233,7 +1233,7 @@ impl CircuitData { { // Standard gates can all rebuild their definitions, so if the // cached py_op exists, just clear out any existing cache. - if let Some(borrowed) = previous.py_op.borrow().as_ref() { + if let Some(borrowed) = previous.py_op.get() { borrowed.bind(py).setattr("_definition", py.None())? } } @@ -1302,7 +1302,7 @@ impl CircuitData { previous.extra_attrs = new_op.extra_attrs; #[cfg(feature = "cache_pygates")] { - *previous.py_op.borrow_mut() = Some(op.into_py(py)); + previous.py_op = op.into_py(py).into(); } for uuid in uuids.iter() { self.param_table.add_use(*uuid, usage)? diff --git a/crates/circuit/src/circuit_instruction.rs b/crates/circuit/src/circuit_instruction.rs index d44051745a89..ae649b5a8110 100644 --- a/crates/circuit/src/circuit_instruction.rs +++ b/crates/circuit/src/circuit_instruction.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::RefCell; +use std::cell::OnceCell; use numpy::IntoPyArray; use pyo3::basic::CompareOp; @@ -110,7 +110,7 @@ pub struct CircuitInstruction { pub params: SmallVec<[Param; 3]>, pub extra_attrs: Option>, #[cfg(feature = "cache_pygates")] - pub py_op: RefCell>, + pub py_op: OnceCell>, } impl CircuitInstruction { @@ -122,17 +122,18 @@ impl CircuitInstruction { /// Get the Python-space operation, ensuring that it is mutable from Python space (singleton /// gates might not necessarily satisfy this otherwise). /// - /// This returns the cached instruction if valid, and replaces the cached instruction if not. - pub fn get_operation_mut(&self, py: Python) -> PyResult> { - let mut out = self.get_operation(py)?.into_bound(py); - if !out.getattr(intern!(py, "mutable"))?.extract::()? { - out = out.call_method0(intern!(py, "to_mutable"))?; - } - #[cfg(feature = "cache_pygates")] - { - *self.py_op.borrow_mut() = Some(out.to_object(py)); + /// This returns the cached instruction if valid, but does not replace the cache if it created a + /// new mutable object; the expectation is that any mutations to the Python object need + /// assigning back to the `CircuitInstruction` completely to ensure data coherence between Rust + /// and Python spaces. We can't protect entirely against that, but we can make it a bit harder + /// for standard-gate getters to accidentally do the wrong thing. + pub fn get_operation_mut<'py>(&self, py: Python<'py>) -> PyResult> { + let out = self.get_operation(py)?.into_bound(py); + if out.getattr(intern!(py, "mutable"))?.is_truthy()? { + Ok(out) + } else { + out.call_method0(intern!(py, "to_mutable")) } - Ok(out.unbind()) } } @@ -155,7 +156,7 @@ impl CircuitInstruction { params: op_parts.params, extra_attrs: op_parts.extra_attrs, #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(Some(operation.into_py(py))), + py_op: operation.into_py(py).into(), }) } @@ -182,7 +183,7 @@ impl CircuitInstruction { }) }), #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(None), + py_op: OnceCell::new(), }) } @@ -197,9 +198,14 @@ impl CircuitInstruction { /// The logical operation that this instruction represents an execution of. #[getter] pub fn get_operation(&self, py: Python) -> PyResult { + // This doesn't use `get_or_init` because a) the initialiser is fallible and + // `get_or_try_init` isn't stable, and b) the initialiser can yield to the Python + // interpreter, which might suspend the thread and allow another to inadvertantly attempt to + // re-enter the cache setter, which isn't safe. + #[cfg(feature = "cache_pygates")] { - if let Ok(Some(cached_op)) = self.py_op.try_borrow().as_deref() { + if let Some(cached_op) = self.py_op.get() { return Ok(cached_op.clone_ref(py)); } } @@ -215,9 +221,7 @@ impl CircuitInstruction { #[cfg(feature = "cache_pygates")] { - if let Ok(mut cell) = self.py_op.try_borrow_mut() { - cell.get_or_insert_with(|| out.clone_ref(py)); - } + self.py_op.get_or_init(|| out.clone_ref(py)); } Ok(out) @@ -337,7 +341,7 @@ impl CircuitInstruction { params: params.unwrap_or(op_parts.params), extra_attrs: op_parts.extra_attrs, #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(Some(operation.into_py(py))), + py_op: operation.into_py(py).into(), }) } else { Ok(Self { @@ -347,12 +351,7 @@ impl CircuitInstruction { params: params.unwrap_or_else(|| self.params.clone()), extra_attrs: self.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: RefCell::new( - self.py_op - .try_borrow() - .ok() - .and_then(|opt| opt.as_ref().map(|op| op.clone_ref(py))), - ), + py_op: self.py_op.clone(), }) } } diff --git a/crates/circuit/src/dag_node.rs b/crates/circuit/src/dag_node.rs index 73983c35e2e8..9af82b74fa5a 100644 --- a/crates/circuit/src/dag_node.rs +++ b/crates/circuit/src/dag_node.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::RefCell; +use std::cell::OnceCell; use crate::circuit_instruction::{CircuitInstruction, OperationFromPython}; use crate::imports::QUANTUM_CIRCUIT; @@ -170,7 +170,7 @@ impl DAGOpNode { instruction.operation = instruction.operation.py_deepcopy(py, None)?; #[cfg(feature = "cache_pygates")] { - *instruction.py_op.borrow_mut() = None; + instruction.py_op = OnceCell::new(); } } let base = PyClassInitializer::from(DAGNode { _node_id: -1 }); @@ -223,7 +223,7 @@ impl DAGOpNode { params: self.instruction.params.clone(), extra_attrs: self.instruction.extra_attrs.clone(), #[cfg(feature = "cache_pygates")] - py_op: RefCell::new(None), + py_op: OnceCell::new(), }) } @@ -240,7 +240,7 @@ impl DAGOpNode { self.instruction.extra_attrs = res.extra_attrs; #[cfg(feature = "cache_pygates")] { - *self.instruction.py_op.borrow_mut() = Some(op.into_py(op.py())); + self.instruction.py_op = op.clone().unbind().into(); } Ok(()) } @@ -399,7 +399,7 @@ impl DAGOpNode { /// Sets the Instruction name corresponding to the op for this node #[setter] fn set_name(&mut self, py: Python, new_name: PyObject) -> PyResult<()> { - let op = self.instruction.get_operation_mut(py)?.into_bound(py); + let op = self.instruction.get_operation_mut(py)?; op.setattr(intern!(py, "name"), new_name)?; self.instruction.operation = op.extract::()?.operation; Ok(()) diff --git a/crates/circuit/src/packed_instruction.rs b/crates/circuit/src/packed_instruction.rs index ac8795d664c1..9c4f19aa1ba3 100644 --- a/crates/circuit/src/packed_instruction.rs +++ b/crates/circuit/src/packed_instruction.rs @@ -11,7 +11,7 @@ // that they have been altered from the originals. #[cfg(feature = "cache_pygates")] -use std::cell::RefCell; +use std::cell::OnceCell; use std::ptr::NonNull; use pyo3::intern; @@ -421,11 +421,17 @@ pub struct PackedInstruction { pub extra_attrs: Option>, #[cfg(feature = "cache_pygates")] - /// This is hidden in a `RefCell` because, while that has additional memory-usage implications - /// while we're still building with the feature enabled, we intend to remove the feature in the - /// future, and hiding the cache within a `RefCell` lets us keep the cache transparently in our - /// interfaces, without needing various functions to unnecessarily take `&mut` references. - pub py_op: RefCell>>, + /// This is hidden in a `OnceCell` because it's just an on-demand cache; we don't create this + /// unless asked for it. A `OnceCell` of a non-null pointer type (like `Py`) is the same + /// size as a pointer and there are no runtime checks on access beyond the initialisation check, + /// which is a simple null-pointer check. + /// + /// WARNING: remember that `OnceCell`'s `get_or_init` method is no-reentrant, so the initialiser + /// must not yield the GIL to Python space. We avoid using `GILOnceCell` here because it + /// requires the GIL to even `get` (of course!), which makes implementing `Clone` hard for us. + /// We can revisit once we're on PyO3 0.22+ and have been able to disable its `py-clone` + /// feature. + pub py_op: OnceCell>, } impl PackedInstruction { @@ -471,33 +477,37 @@ impl PackedInstruction { /// containing circuit; updates to its parameters, label, duration, unit and condition will not /// be propagated back. pub fn unpack_py_op(&self, py: Python) -> PyResult> { - #[cfg(feature = "cache_pygates")] - { - if let Ok(Some(cached_op)) = self.py_op.try_borrow().as_deref() { - return Ok(cached_op.clone_ref(py)); - } - } - - let out = match self.op.view() { - OperationRef::Standard(standard) => standard - .create_py_op( + let unpack = || -> PyResult> { + match self.op.view() { + OperationRef::Standard(standard) => standard.create_py_op( py, self.params.as_deref().map(SmallVec::as_slice), self.extra_attrs.as_deref(), - )? - .into_any(), - OperationRef::Gate(gate) => gate.gate.clone_ref(py), - OperationRef::Instruction(instruction) => instruction.instruction.clone_ref(py), - OperationRef::Operation(operation) => operation.operation.clone_ref(py), + ), + OperationRef::Gate(gate) => Ok(gate.gate.clone_ref(py)), + OperationRef::Instruction(instruction) => Ok(instruction.instruction.clone_ref(py)), + OperationRef::Operation(operation) => Ok(operation.operation.clone_ref(py)), + } }; + // `OnceCell::get_or_init` and the non-stabilised `get_or_try_init`, which would otherwise + // be nice here are both non-reentrant. This is a problem if the init yields control to the + // Python interpreter as this one does, since that can allow CPython to freeze the thread + // and for another to attempt the initialisation. #[cfg(feature = "cache_pygates")] { - if let Ok(mut cell) = self.py_op.try_borrow_mut() { - cell.get_or_insert_with(|| out.clone_ref(py)); + if let Some(ob) = self.py_op.get() { + return Ok(ob.clone_ref(py)); } } - + let out = unpack()?; + #[cfg(feature = "cache_pygates")] + { + // The unpacking operation can cause a thread pause and concurrency, since it can call + // interpreted Python code for a standard gate, so we need to take care that some other + // Python thread might have populated the cache before we do. + let _ = self.py_op.set(out.clone_ref(py)); + } Ok(out) } }