From d97eb63512c65f6dc0c3e1470b0bc043b57d5c38 Mon Sep 17 00:00:00 2001 From: burgerindividual Date: Fri, 11 Aug 2023 23:22:27 -0400 Subject: [PATCH] finish frustum and fog culling, branchless checking, start work on bfs (still wip) --- native/core/Cargo.toml | 2 +- native/core/src/ffi.rs | 27 --- native/core/src/graph/local.rs | 242 +++++++++++++++++-- native/core/src/graph/mod.rs | 397 ++++++++++++++------------------ native/core/src/graph/octree.rs | 146 ++++++------ 5 files changed, 467 insertions(+), 347 deletions(-) diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index cbce4cd858..5c8db8694e 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -17,7 +17,7 @@ lto = "thin" [profile.asm] inherits = "release" panic = "abort" -debug = true +debug = false lto = "off" [profile.production] diff --git a/native/core/src/ffi.rs b/native/core/src/ffi.rs index 56355d1ed6..37ebb52a23 100644 --- a/native/core/src/ffi.rs +++ b/native/core/src/ffi.rs @@ -186,31 +186,4 @@ impl CInlineVec { // let graph = Box::from_raw(graph.into_mut_ref()); // std::mem::drop(graph); // } -// -// #[no_mangle] -// pub unsafe extern "C" fn Java_me_jellysquid_mods_sodium_core_CoreLibFFI_frustumCreate( -// _: *mut JEnv, -// _: *mut JClass, -// out_frustum: JPtrMut<*const LocalFrustum>, -// planes: JPtr<[[f32; 4]; 6]>, -// offset: JPtr<[f32; 3]>, -// ) { -// let planes = planes.as_ref().map(f32x3::from_array); -// -// let offset = f32x3::from_array(*offset.as_ref()); -// -// let frustum = Box::new(LocalFrustum::new(planes, offset)); -// -// let out_frustum = out_frustum.into_mut_ref(); -// *out_frustum = Box::into_raw(frustum); -// } -// -// #[no_mangle] -// pub unsafe extern "C" fn Java_me_jellysquid_mods_sodium_core_CoreLibFFI_frustumDelete( -// _: *mut JEnv, -// _: *mut JClass, -// frustum: JPtrMut, -// ) { -// std::mem::drop(Box::from_raw(frustum.into_mut_ref())); -// } // } diff --git a/native/core/src/graph/local.rs b/native/core/src/graph/local.rs index 4bbdf16109..8b8a944d59 100644 --- a/native/core/src/graph/local.rs +++ b/native/core/src/graph/local.rs @@ -1,5 +1,6 @@ use std::mem::transmute; use std::ops::Shr; +use std::thread::current; use core_simd::simd::*; use std_float::StdFloat; @@ -8,6 +9,212 @@ use crate::graph::octree::{LEVEL_3_COORD_LENGTH, LEVEL_3_COORD_MASK, LEVEL_3_COO use crate::graph::*; use crate::math::*; +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct LocalNodeIndex(u32); + +// XYZXYZXYZXYZXYZXYZXYZXYZ +const LOCAL_NODE_INDEX_X_MASK: u32 = 0b10010010_01001001_00100100; +const LOCAL_NODE_INDEX_Y_MASK: u32 = 0b01001001_00100100_10010010; +const LOCAL_NODE_INDEX_Z_MASK: u32 = 0b00100100_10010010_01001001; + +impl LocalNodeIndex { + #[inline(always)] + pub fn pack(unpacked: u8x3) -> Self { + // allocate one byte per bit for each element. + // each element is still has its individual bits in linear ordering, but the bytes in the + // vector are in morton ordering. + #[rustfmt::skip] + let expanded_linear_bits = simd_swizzle!( + unpacked, + [ + // X, Y, Z + 2, 1, 0, + 2, 1, 0, + 2, 1, 0, + 2, 1, 0, + 2, 1, 0, + 2, 1, 0, + 2, 1, 0, + 2, 1, 0, // LSB + ] + ); + + // shift each bit into the sign bit for morton ordering + #[rustfmt::skip] + let expanded_morton_bits = expanded_linear_bits << Simd::::from_array( + [ + 7, 7, 7, + 6, 6, 6, + 5, 5, 5, + 4, 4, 4, + 3, 3, 3, + 2, 2, 2, + 1, 1, 1, + 0, 0, 0, // LSB + ], + ); + + // arithmetic shift to set each whole lane to its sign bit, then shrinking all lanes to bitmask + let morton_packed = unsafe { + Mask::::from_int_unchecked(expanded_morton_bits.cast::() >> Simd::splat(7)) + } + .to_bitmask(); + + Self(morton_packed) + } + + #[inline(always)] + pub fn inc_x(self) -> Self { + self.inc::<{ LOCAL_NODE_INDEX_X_MASK }>() + } + + #[inline(always)] + pub fn inc_y(self) -> Self { + self.inc::<{ LOCAL_NODE_INDEX_Y_MASK }>() + } + + #[inline(always)] + pub fn inc_z(self) -> Self { + self.inc::<{ LOCAL_NODE_INDEX_Z_MASK }>() + } + + #[inline(always)] + pub fn dec_x(self) -> Self { + self.dec::<{ LOCAL_NODE_INDEX_X_MASK }>() + } + + #[inline(always)] + pub fn dec_y(self) -> Self { + self.dec::<{ LOCAL_NODE_INDEX_Y_MASK }>() + } + + #[inline(always)] + pub fn dec_z(self) -> Self { + self.dec::<{ LOCAL_NODE_INDEX_Z_MASK }>() + } + + #[inline(always)] + pub fn inc(self) -> Self { + // make the other bits in the number 1 + let mut masked = self.0 | !MASK; + + // increment + masked = masked.wrapping_add(1_u32 << LEVEL); + + // modify only the masked bits in the original number + Self((self.0 & !MASK) | (masked & MASK)) + } + + #[inline(always)] + pub fn dec(self) -> Self { + // make the other bits in the number 0 + let mut masked = self.0 & MASK; + + // decrement + masked = masked.wrapping_sub(1_u32 << LEVEL); + + // modify only the masked bits in the original number + Self((self.0 & !MASK) | (masked & MASK)) + } + + #[inline(always)] + pub fn as_array_offset(&self) -> usize { + self.0 as usize + } + + #[inline(always)] + pub fn iter_lower_nodes(&self) -> LowerNodeIter { + LowerNodeIter::new(self) + } + + // #[inline(always)] + // pub fn get_all_neighbors(&self) -> [Self; 6] { + // const INC_MASKS: Simd = Simd::from_array([]); + // } + + #[inline(always)] + pub fn unpack(&self) -> u8x3 { + // allocate one byte per bit for each element. + // each element is still has its individual bits in morton ordering, but the bytes in the + // vector are in linear ordering. + #[rustfmt::skip] + let expanded_linear_bits = simd_swizzle!( + u8x4::from_array(self.0.to_le_bytes()), + [ + // X + 2, 2, 2, 1, 1, 1, 0, 0, + // Y + 2, 2, 2, 1, 1, 0, 0, 0, + // Z + 2, 2, 1, 1, 1, 0, 0, 0, // LSB + ] + ); + + // shift each bit into the sign bit for morton ordering + #[rustfmt::skip] + let expanded_morton_bits = expanded_linear_bits << Simd::::from_array( + [ + // X + 0, 3, 6, + 1, 4, 7, + 2, 5, + // Y + 1, 4, 7, + 2, 5, 0, + 3, 6, + // Z + 2, 5, 0, + 3, 6, 1, + 4, 7, // LSB + ], + ); + + // arithmetic shift to set each whole lane to its sign bit, then shrinking all lanes to bitmask + let linear_packed = unsafe { + Mask::::from_int_unchecked(expanded_morton_bits.cast::() >> Simd::splat(7)) + } + .to_bitmask(); + + u8x3::from_slice(&linear_packed.to_le_bytes()[0..=2]) + } +} + +pub struct LowerNodeIter { + current: u32, + end: u32, +} + +impl LowerNodeIter { + fn new(index: &LocalNodeIndex) -> Self { + assert!(LEVEL > LOWER_LEVEL); + + let node_size = 1 << (LEVEL * 3); + + Self { + current: index.0, + end: index.0 + node_size, + } + } +} + +impl Iterator for LowerNodeIter { + type Item = LocalNodeIndex; + + fn next(&mut self) -> Option { + if self.current >= self.end { + None + } else { + let current = self.current; + + let lower_node_size = 1 << (LOWER_LEVEL * 3); + self.current += lower_node_size; + + Some(LocalNodeIndex(current)) + } + } +} + pub struct LocalCoordinateContext { frustum: LocalFrustum, @@ -24,7 +231,7 @@ pub struct LocalCoordinateContext { // this is the index that encompasses the corner of the view distance bounding box where the // coordinate for each axis is closest to negative infinity, and truncated to the origin of the // level 3 node it's contained in. - pub iter_node_origin_idx: LocalNodeIndex, + pub iter_node_origin_index: LocalNodeIndex<3>, pub iter_node_origin_coords: u8x3, pub level_3_node_iters: u8x3, @@ -65,7 +272,7 @@ impl LocalCoordinateContext { ); let iter_node_origin_coords = iter_section_origin_coords & u8x3::splat(LEVEL_3_COORD_MASK); - let iter_node_origin_idx = LocalNodeIndex::pack(iter_node_origin_coords); + let iter_node_origin_index = LocalNodeIndex::pack(iter_node_origin_coords); let view_cube_length = (section_view_distance * 2) + 1; // convert to i32 to avoid implicit wrapping, then explicitly wrap @@ -88,7 +295,7 @@ impl LocalCoordinateContext { fog_distance_squared, world_bottom_section_y, world_top_section_y, - iter_node_origin_idx, + iter_node_origin_index, iter_node_origin_coords, level_3_node_iters, iter_region_origin_coords: todo!(), @@ -102,7 +309,12 @@ impl LocalCoordinateContext { // } #[inline(always)] - pub fn check_node(&self, local_node_pos: u8x3) -> BoundsCheckResult { + pub fn test_node( + &self, + local_node_index: LocalNodeIndex, + ) -> BoundsCheckResult { + let local_node_pos = local_node_index.unpack(); + let bounds = self.node_get_local_bounds::(local_node_pos); let mut result = self.bounds_inside_fog::(&bounds); @@ -232,7 +444,8 @@ impl LocalFrustum { } } - #[inline(always)] + // #[inline(always)] + #[no_mangle] pub fn test_local_bounding_box(&self, bb: &LocalBoundingBox) -> BoundsCheckResult { unsafe { // These unsafe mask shenanigans just check if the sign bit is set for each lane. @@ -271,18 +484,13 @@ impl LocalFrustum { .fast_fma(inside_bounds_y, self.plane_zs * inside_bounds_z), ); - if outside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b111111 { - if inside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b111111 { - BoundsCheckResult::Inside - } else { - BoundsCheckResult::Partial - } - } else { - if inside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b111111 { - panic!("BAD!!!!!"); - } - BoundsCheckResult::Outside - } + // if any outside lengths are greater than -w, return OUTSIDE + // if all inside lengths are greater than -w, return INSIDE + // otherwise, return PARTIAL + let none_outside = outside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b000000; + let all_inside = inside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b111111; + + BoundsCheckResult::from_int_unchecked(none_outside as u8 + all_inside as u8) } } } diff --git a/native/core/src/graph/mod.rs b/native/core/src/graph/mod.rs index e13d2db430..172d8a8cc3 100644 --- a/native/core/src/graph/mod.rs +++ b/native/core/src/graph/mod.rs @@ -1,6 +1,8 @@ use std::collections::VecDeque; use std::fmt::Debug; use std::intrinsics::{prefetch_read_data, prefetch_write_data}; +use std::marker::PhantomData; +use std::mem::transmute; use std::ops::*; use std::vec::Vec; @@ -11,7 +13,7 @@ use std_float::StdFloat; use crate::collections::ArrayDeque; use crate::ffi::{CInlineVec, CVec}; -use crate::graph::local::BoundsCheckResult; +use crate::graph::local::*; use crate::graph::octree::LinearBitOctree; use crate::math::*; @@ -22,167 +24,6 @@ pub const REGION_COORD_MASK: u8x3 = u8x3::from_array([0b11111000, 0b11111100, 0b pub const SECTIONS_IN_REGION: usize = 8 * 4 * 8; pub const SECTIONS_IN_GRAPH: usize = 256 * 256 * 256; -#[derive(Clone, Copy)] -#[repr(transparent)] -pub struct LocalNodeIndex(u32); - -impl LocalNodeIndex { - // XYZXYZXYZXYZXYZXYZXYZXYZ - const X_MASK: u32 = 0b10010010_01001001_00100100; - const Y_MASK: u32 = 0b01001001_00100100_10010010; - const Z_MASK: u32 = 0b00100100_10010010_01001001; - - #[inline(always)] - pub fn pack(unpacked: u8x3) -> Self { - // allocate one byte per bit for each element. - // each element is still has its individual bits in linear ordering, but the bytes in the - // vector are in morton ordering. - #[rustfmt::skip] - let expanded_linear_bits = simd_swizzle!( - unpacked, - [ - // X, Y, Z - 2, 1, 0, - 2, 1, 0, - 2, 1, 0, - 2, 1, 0, - 2, 1, 0, - 2, 1, 0, - 2, 1, 0, - 2, 1, 0, // LSB - ] - ); - - // shift each bit into the sign bit for morton ordering - #[rustfmt::skip] - let expanded_morton_bits = expanded_linear_bits << Simd::::from_array( - [ - 7, 7, 7, - 6, 6, 6, - 5, 5, 5, - 4, 4, 4, - 3, 3, 3, - 2, 2, 2, - 1, 1, 1, - 0, 0, 0, // LSB - ], - ); - - // arithmetic shift to set each whole lane to its sign bit, then shrinking all lanes to bitmask - let morton_packed = unsafe { - Mask::::from_int_unchecked(expanded_morton_bits.cast::() >> Simd::splat(7)) - } - .to_bitmask(); - - Self(morton_packed) - } - - #[inline(always)] - pub fn inc_x(self) -> Self { - self.inc::() - } - - #[inline(always)] - pub fn inc_y(self) -> Self { - self.inc::() - } - - #[inline(always)] - pub fn inc_z(self) -> Self { - self.inc::() - } - - #[inline(always)] - pub fn dec_x(self) -> Self { - self.dec::() - } - - #[inline(always)] - pub fn dec_y(self) -> Self { - self.dec::() - } - - #[inline(always)] - pub fn dec_z(self) -> Self { - self.dec::() - } - - #[inline(always)] - pub fn inc(self) -> Self { - // make the other bits in the number 1 - let mut masked = self.0 | !MASK; - - // increment - masked = masked.wrapping_add(1_u32 << LEVEL); - - // modify only the masked bits in the original number - Self((self.0 & !MASK) | (masked & MASK)) - } - - #[inline(always)] - pub fn dec(self) -> Self { - // make the other bits in the number 0 - let mut masked = self.0 & MASK; - - // decrement - masked = masked.wrapping_sub(1_u32 << LEVEL); - - // modify only the masked bits in the original number - Self((self.0 & !MASK) | (masked & MASK)) - } - - #[inline(always)] - pub fn as_array_offset(&self) -> usize { - self.0 as usize - } - - #[inline(always)] - pub fn unpack(&self) -> u8x3 { - // allocate one byte per bit for each element. - // each element is still has its individual bits in morton ordering, but the bytes in the - // vector are in linear ordering. - #[rustfmt::skip] - let expanded_linear_bits = simd_swizzle!( - u8x4::from_array(self.0.to_le_bytes()), - [ - // X - 2, 2, 2, 1, 1, 1, 0, 0, - // Y - 2, 2, 2, 1, 1, 0, 0, 0, - // Z - 2, 2, 1, 1, 1, 0, 0, 0, // LSB - ] - ); - - // shift each bit into the sign bit for morton ordering - #[rustfmt::skip] - let expanded_morton_bits = expanded_linear_bits << Simd::::from_array( - [ - // X - 0, 3, 6, - 1, 4, 7, - 2, 5, - // Y - 1, 4, 7, - 2, 5, 0, - 3, 6, - // Z - 2, 5, 0, - 3, 6, 1, - 4, 7, // LSB - ], - ); - - // arithmetic shift to set each whole lane to its sign bit, then shrinking all lanes to bitmask - let linear_packed = unsafe { - Mask::::from_int_unchecked(expanded_morton_bits.cast::() >> Simd::splat(7)) - } - .to_bitmask(); - - u8x3::from_slice(&linear_packed.to_le_bytes()[0..=2]) - } -} - #[derive(Clone, Copy)] #[repr(transparent)] pub struct RegionSectionIndex(u8); @@ -215,13 +56,14 @@ impl RegionSectionIndex { } #[derive(Clone, Copy, PartialEq, Eq)] +#[repr(u8)] pub enum GraphDirection { - NegX, - NegY, - NegZ, - PosX, - PosY, - PosZ, + NegX = 0, + NegY = 1, + NegZ = 2, + PosX = 3, + PosY = 4, + PosZ = 5, } impl GraphDirection { @@ -245,9 +87,17 @@ impl GraphDirection { GraphDirection::PosZ => GraphDirection::NegZ, } } + + /// SAFETY: if out of bounds, this will fail to assert in debug mode + #[inline(always)] + pub unsafe fn from_int_unchecked(val: u8) -> Self { + debug_assert!(val <= 5); + transmute(val) + } } #[derive(Clone, Copy)] +#[repr(transparent)] pub struct GraphDirectionSet(u8); impl GraphDirectionSet { @@ -281,7 +131,7 @@ impl GraphDirectionSet { #[inline(always)] pub fn add(&mut self, dir: GraphDirection) { - self.0 |= 1 << dir as usize; + self.0 |= 1 << dir as u8; } #[inline(always)] @@ -291,7 +141,7 @@ impl GraphDirectionSet { #[inline(always)] pub fn contains(&self, dir: GraphDirection) -> bool { - (self.0 & (1 << dir as usize)) != 0 + (self.0 & (1 << dir as u8)) != 0 } #[inline(always)] @@ -314,7 +164,40 @@ impl BitAnd for GraphDirectionSet { } } +impl IntoIterator for GraphDirectionSet { + type Item = GraphDirection; + type IntoIter = GraphDirectionSetIter; + + fn into_iter(self) -> Self::IntoIter { + GraphDirectionSetIter(self.0) + } +} + +#[repr(transparent)] +pub struct GraphDirectionSetIter(u8); + +impl Iterator for GraphDirectionSetIter { + type Item = GraphDirection; + + #[inline(always)] + fn next(&mut self) -> Option { + // Description of the iteration approach on daniel lemire's blog + // https://lemire.me/blog/2018/02/21/iterating-over-set-bits-quickly/ + if self.0 != 0 { + // SAFETY: the result from a valid GraphDirectionSet value should never be out of bounds + let direction = + unsafe { GraphDirection::from_int_unchecked(self.0.trailing_zeros() as u8) }; + self.0 &= (self.0 - 1); + Some(direction) + } else { + None + } + } +} + +// todo: should the top bit signify if it's populated or not? #[derive(Default, Clone, Copy)] +#[repr(transparent)] pub struct VisibilityData(u16); impl VisibilityData { @@ -393,25 +276,29 @@ impl VisibilityData { } } -struct GraphSearchState { - incoming: [GraphDirectionSet; SECTIONS_IN_REGION], - queue: ArrayDeque, +pub struct GraphSearchState { + incoming: [GraphDirectionSet; SECTIONS_IN_GRAPH], + // TODO: figure out a way to calculate a smaller value + queue: ArrayDeque, SECTIONS_IN_GRAPH>, enqueued: bool, } impl GraphSearchState { - // pub fn enqueue(&mut self, index: LocalNodeIndex, directions: GraphDirectionSet) { - // let incoming = &mut self.incoming[index.as_array_offset()]; - // let should_enqueue = incoming.is_empty(); - // - // incoming.add_all(directions); - // - // unsafe { - // self.queue - // .push_conditionally_unchecked(index, should_enqueue); - // } - // } + pub fn enqueue(&mut self, index: LocalNodeIndex<1>, incoming_direction: GraphDirection) { + // SAFETY: LocalNodeIndex should never have the top 8 bits set, and the array is exactly + // 2^24 elements long. + let node_incoming_directions = + unsafe { self.incoming.get_unchecked_mut(index.as_array_offset()) }; + let should_enqueue = node_incoming_directions.is_empty(); + + node_incoming_directions.add(incoming_direction); + + unsafe { + self.queue + .push_conditionally_unchecked(index, should_enqueue); + } + } fn reset(&mut self) { self.queue.reset(); @@ -425,63 +312,92 @@ impl Default for GraphSearchState { fn default() -> Self { Self { queue: Default::default(), - incoming: [GraphDirectionSet::default(); SECTIONS_IN_REGION], + incoming: [GraphDirectionSet::default(); SECTIONS_IN_GRAPH], enqueued: false, } } } pub struct Graph { - section_populated_bits: LinearBitOctree, - section_visibility_bits: LinearBitOctree, + section_is_populated_bits: LinearBitOctree, + section_is_visible_bits: LinearBitOctree, + + section_visibility_bit_sets: [VisibilityData; SECTIONS_IN_GRAPH], } impl Graph { pub fn new() -> Self { Graph { - section_populated_bits: Default::default(), - section_visibility_bits: Default::default(), + section_is_populated_bits: Default::default(), + section_is_visible_bits: Default::default(), + section_visibility_bit_sets: [Default::default(); SECTIONS_IN_GRAPH], } } - pub fn cull(&mut self, context: LocalCoordinateContext, no_occlusion_cull: bool) {} + pub fn cull(&mut self, context: &LocalCoordinateContext, no_occlusion_cull: bool) { + self.section_is_visible_bits.clear(); + + self.frustum_and_fog_cull(context); + } - fn frustum_and_fog_cull(&mut self, context: LocalCoordinateContext) { - let cur_idx = context.iter_node_origin_idx; + // #[no_mangle] + fn frustum_and_fog_cull(&mut self, context: &LocalCoordinateContext) { + let mut level_3_index = context.iter_node_origin_index; - // figure out how to make this go linearly in the + // this could go more linear in memory prolly, but eh for _x in 0..context.level_3_node_iters.x() { for _y in 0..context.level_3_node_iters.y() { for _z in 0..context.level_3_node_iters.z() { - unsafe { - // inside of individual level 3 nodes, the cache locality is *extremely* good. - const LOCALITY: i32 = 3; + self.check_node(level_3_index, context); - prefetch_read_data(&self.section_populated_bits, LOCALITY); - prefetch_write_data(&self.section_visibility_bits, LOCALITY); - } + level_3_index = level_3_index.inc_z(); + } + level_3_index = level_3_index.inc_y(); + } + level_3_index = level_3_index.inc_x(); + } + } - let cur_pos = cur_idx.unpack(); - - match context.check_node::<3>(cur_pos) { - BoundsCheckResult::Outside => {} - BoundsCheckResult::Inside => { - self.section_visibility_bits - .copy_from::<3>(&self.section_populated_bits, cur_idx); - } - BoundsCheckResult::Outside | BoundsCheckResult::Inside => { - cur_idx.inc_z::<3>(); - } - BoundsCheckResult::Partial => {} + #[inline(always)] + fn check_node( + &mut self, + index: LocalNodeIndex, + context: &LocalCoordinateContext, + ) { + match context.test_node(index) { + BoundsCheckResult::Outside => {} + BoundsCheckResult::Inside => { + self.section_is_visible_bits + .copy_from(&self.section_is_populated_bits, index); + } + BoundsCheckResult::Partial => match LEVEL { + 3 => { + for lower_node_index in index.iter_lower_nodes::<2>() { + self.check_node(lower_node_index, context); } } - } + 2 => { + for lower_node_index in index.iter_lower_nodes::<1>() { + self.check_node(lower_node_index, context); + } + } + 1 => { + for lower_node_index in index.iter_lower_nodes::<0>() { + self.check_node(lower_node_index, context); + } + } + 0 => { + self.section_is_visible_bits + .copy_from(&self.section_is_populated_bits, index); + } + _ => panic!("Invalid node level: {}", LEVEL), + }, } } // fn bfs_and_occlusion_cull( // &mut self, - // context: LocalCoordinateContext, + // context: &LocalCoordinateContext, // no_occlusion_cull: bool, // ) -> CVec { // let mut region_iteration_queue: VecDeque = VecDeque::new(); @@ -507,12 +423,12 @@ impl Graph { // let mut search_ctx = SearchContext::create(&mut self.regions, region_coord); // let mut batch: RegionDrawBatch = RegionDrawBatch::new(region_coord); // - // while let Some(node_idx) = search_ctx.origin().search_state.queue.pop() { - // let node_coord = node_idx.as_global_coord(region_coord); + // while let Some(node_index) = search_ctx.origin().search_state.queue.pop() { + // let node_coord = node_index.as_global_coord(region_coord); // - // let node = search_ctx.origin().nodes[node_idx.as_array_offset()]; + // let node = search_ctx.origin().nodes[node_index.as_array_offset()]; // let node_incoming = - // search_ctx.origin().search_state.incoming[node_idx.as_array_offset()]; + // search_ctx.origin().search_state.incoming[node_index.as_array_offset()]; // // if !chunk_inside_fog(node_coord, origin_node_coord, view_distance) // || !chunk_inside_frustum(node_coord, frustum) @@ -521,14 +437,14 @@ impl Graph { // } // // if (node.flags & (1 << 1)) != 0 { - // batch.sections.push(node_idx); + // batch.sections.push(node_index); // } // // let valid_directions = get_valid_directions(origin_node_coord, node_coord); // let allowed_directions = // node.connections.get_outgoing_directions(node_incoming) & valid_directions; // - // Self::enqueue_all_neighbors(&mut search_ctx, allowed_directions, node_idx); + // Self::enqueue_all_neighbors(&mut search_ctx, allowed_directions, node_index); // } // // if !batch.is_empty() { @@ -553,11 +469,15 @@ impl Graph { // // CVec::from_boxed_slice(sorted_batches.into_boxed_slice()) // } - // + + fn divide_graph_into_regions(&self) -> CVec { + todo!() + } + // fn enqueue_all_neighbors( - // context: &mut SearchContext, + // state: &mut GraphSearchState, // directions: GraphDirectionSet, - // origin: LocalNodeIndex, + // index: LocalNodeIndex<1>, // ) { // for direction in GraphDirection::ORDERED { // if directions.contains(direction) { @@ -613,6 +533,39 @@ impl Graph { // } } +#[inline(always)] +pub fn get_neighbors( + outgoing: GraphDirectionSet, + index: LocalNodeIndex<1>, + search_state: &mut GraphSearchState, +) { + for direction in outgoing { + let neighbor = match direction { + GraphDirection::NegX => index.dec_x(), + GraphDirection::NegY => index.dec_y(), + GraphDirection::NegZ => index.dec_z(), + GraphDirection::PosX => index.inc_x(), + GraphDirection::PosY => index.inc_y(), + GraphDirection::PosZ => index.inc_z(), + }; + + // the outgoing direction for the current node is the incoming direction for the neighbor + search_state.enqueue(neighbor, direction.opposite()); + } +} + +// #[no_mangle] +pub fn get_all_neighbors(index: LocalNodeIndex<1>) -> [LocalNodeIndex<1>; 6] { + [ + index.dec_x(), + index.dec_y(), + index.dec_z(), + index.inc_x(), + index.inc_y(), + index.inc_z(), + ] +} + #[repr(C)] pub struct RegionDrawBatch { region_coord: (i32, i32, i32), diff --git a/native/core/src/graph/octree.rs b/native/core/src/graph/octree.rs index 55f94c769c..47c6e4ff2b 100644 --- a/native/core/src/graph/octree.rs +++ b/native/core/src/graph/octree.rs @@ -2,6 +2,7 @@ use std::mem::size_of; use core_simd::simd::*; +use crate::graph::local::*; use crate::graph::*; // operations on u8x64 are faster in some cases compared to u64x8 @@ -10,9 +11,9 @@ pub type Level2Node = u64; pub type Level1Node = u8; pub type Level0Node = bool; -pub const LEVEL_3_IDX_SHIFT: usize = 9; -pub const LEVEL_2_IDX_SHIFT: usize = 6; -pub const LEVEL_1_IDX_SHIFT: usize = 3; +pub const LEVEL_3_INDEX_SHIFT: usize = 9; +pub const LEVEL_2_INDEX_SHIFT: usize = 6; +pub const LEVEL_1_INDEX_SHIFT: usize = 3; pub const LEVEL_3_COORD_SHIFT: u8 = 3; pub const LEVEL_2_COORD_SHIFT: u8 = 2; @@ -26,8 +27,6 @@ pub const LEVEL_3_COORD_MASK: u8 = 0b11111000; pub const LEVEL_2_COORD_MASK: u8 = 0b11111100; pub const LEVEL_1_COORD_MASK: u8 = 0b11111110; -// All of the unsafe gets should be safe, because LocalNodeIndex should never have the top 8 bits -// set, and our arrays are exactly 2^24 bytes long. pub union LinearBitOctree { level_3: [Level3Node; SECTIONS_IN_GRAPH / size_of::() / 8], level_2: [Level2Node; SECTIONS_IN_GRAPH / size_of::() / 8], @@ -42,40 +41,43 @@ impl Default for LinearBitOctree { } } +// All of the unsafe gets should be safe, because LocalNodeIndex should never have the top 8 bits +// set, and our arrays are exactly 2^24 bytes long. impl LinearBitOctree { /// Returns true if all of the bits in the node are true - pub fn get(&self, section: LocalNodeIndex) -> bool { - let array_offset = section.as_array_offset(); + #[inline(always)] + pub fn get(&self, index: LocalNodeIndex) -> bool { + let array_offset = index.as_array_offset(); match LEVEL { 0 => { - let level_1_idx = array_offset >> LEVEL_1_IDX_SHIFT; - let bit_idx = array_offset & 0b111; + let level_1_index = array_offset >> LEVEL_1_INDEX_SHIFT; + let bit_index = array_offset & 0b111; - let level_1_node = unsafe { *self.level_1.get_unchecked(level_1_idx) }; + let level_1_node = unsafe { *self.level_1.get_unchecked(level_1_index) }; - let bit = (level_1_node >> bit_idx) & 0b1; + let bit = (level_1_node >> bit_index) & 0b1; bit == 0b1 } 1 => { - let level_1_idx = array_offset >> LEVEL_1_IDX_SHIFT; + let level_1_index = array_offset >> LEVEL_1_INDEX_SHIFT; - let level_1_node = unsafe { *self.level_1.get_unchecked(level_1_idx) }; + let level_1_node = unsafe { *self.level_1.get_unchecked(level_1_index) }; level_1_node == u8::MAX } 2 => { - let level_2_idx = array_offset >> LEVEL_2_IDX_SHIFT; + let level_2_index = array_offset >> LEVEL_2_INDEX_SHIFT; - let level_2_node = unsafe { *self.level_2.get_unchecked(level_2_idx) }; + let level_2_node = unsafe { *self.level_2.get_unchecked(level_2_index) }; level_2_node == u64::MAX } 3 => { - let level_3_idx = array_offset >> LEVEL_3_IDX_SHIFT; + let level_3_index = array_offset >> LEVEL_3_INDEX_SHIFT; - let level_3_node = unsafe { *self.level_3.get_unchecked(level_3_idx) }; + let level_3_node = unsafe { *self.level_3.get_unchecked(level_3_index) }; level_3_node == u8x64::splat(u8::MAX) } @@ -83,96 +85,80 @@ impl LinearBitOctree { } } - /// Sets all of the bits in the node to VAL - pub fn set(&mut self, section: LocalNodeIndex) { - let array_offset = section.as_array_offset(); + #[inline(always)] + pub fn copy_from(&mut self, src: &Self, index: LocalNodeIndex) { + let array_offset = index.as_array_offset(); match LEVEL { 0 => { - let level_1_idx = array_offset >> LEVEL_1_IDX_SHIFT; - let bit_idx = array_offset & 0b111; - - let level_1_node = unsafe { self.level_1.get_unchecked_mut(level_1_idx) }; + let level_1_index = array_offset >> LEVEL_1_INDEX_SHIFT; + let bit_index = array_offset & 0b111; - let bit = 0b1 << bit_idx; + let level_1_node_src = unsafe { *src.level_1.get_unchecked(level_1_index) }; + let level_1_node_dst = unsafe { self.level_1.get_unchecked_mut(level_1_index) }; - if VAL { - *level_1_node |= bit; - } else { - *level_1_node &= !bit; - } + let bit_mask = 0b1 << bit_index; + let src_bit = level_1_node_src & bit_mask; + // clear the bit in the destination so the bitwise OR can always act as a copy + *level_1_node_dst &= !bit_mask; + *level_1_node_dst |= src_bit; } 1 => { - let level_1_idx = array_offset >> LEVEL_1_IDX_SHIFT; + let level_1_index = array_offset >> LEVEL_1_INDEX_SHIFT; - let level_1_node = unsafe { self.level_1.get_unchecked_mut(level_1_idx) }; + let level_1_node_src = unsafe { *src.level_1.get_unchecked(level_1_index) }; + let level_1_node_dst = unsafe { self.level_1.get_unchecked_mut(level_1_index) }; - *level_1_node = if VAL { u8::MAX } else { 0_u8 }; + *level_1_node_dst = level_1_node_src; } 2 => { - let level_2_idx = array_offset >> LEVEL_2_IDX_SHIFT; + let level_2_index = array_offset >> LEVEL_2_INDEX_SHIFT; - let level_2_node = unsafe { self.level_2.get_unchecked_mut(level_2_idx) }; + let level_2_node_src = unsafe { *src.level_2.get_unchecked(level_2_index) }; + let level_2_node_dst = unsafe { self.level_2.get_unchecked_mut(level_2_index) }; - *level_2_node = if VAL { u64::MAX } else { 0_u64 }; + *level_2_node_dst = level_2_node_src; } 3 => { - let level_3_idx = array_offset >> LEVEL_3_IDX_SHIFT; + let level_3_index = array_offset >> LEVEL_3_INDEX_SHIFT; - let level_3_node = unsafe { self.level_3.get_unchecked_mut(level_3_idx) }; + let level_3_node_src = unsafe { *src.level_3.get_unchecked(level_3_index) }; + let level_3_node_dst = unsafe { self.level_3.get_unchecked_mut(level_3_index) }; - *level_3_node = if VAL { - u8x64::splat(u8::MAX) - } else { - u8x64::splat(0_u8) - }; + *level_3_node_dst = level_3_node_src; } _ => unreachable!(), } } - pub fn copy_from(&mut self, src: &Self, section: LocalNodeIndex) { - let array_offset = section.as_array_offset(); - - match LEVEL { - 0 => { - let level_1_idx = array_offset >> LEVEL_1_IDX_SHIFT; - let bit_idx = array_offset & 0b111; - - let level_1_node_src = unsafe { *src.level_1.get_unchecked(level_1_idx) }; - let level_1_node_dst = unsafe { self.level_1.get_unchecked_mut(level_1_idx) }; - - let bit_mask = 0b1 << bit_idx; - let src_bit = level_1_node_src & bit_mask; - // clear the bit in the destination so the bitwise OR can always act as a copy - *level_1_node_dst &= !bit_mask; - *level_1_node_dst |= src_bit; - } - 1 => { - let level_1_idx = array_offset >> LEVEL_1_IDX_SHIFT; - - let level_1_node_src = unsafe { *src.level_1.get_unchecked(level_1_idx) }; - let level_1_node_dst = unsafe { self.level_1.get_unchecked_mut(level_1_idx) }; + #[inline(always)] + pub fn clear(&mut self) { + // The default content is just 0s, so we can create a default and use that. + unsafe { self.level_3 = Self::default().level_3 }; + } - *level_1_node_dst = level_1_node_src; - } - 2 => { - let level_2_idx = array_offset >> LEVEL_2_IDX_SHIFT; + // inside of individual level 3 nodes, the cache locality is *extremely* good. + const INTRINSIC_LOCALITY_LEVEL: i32 = 3; - let level_2_node_src = unsafe { *src.level_2.get_unchecked(level_2_idx) }; - let level_2_node_dst = unsafe { self.level_2.get_unchecked_mut(level_2_idx) }; + pub fn prefetch_top_node_read(&self, index: LocalNodeIndex<3>) { + unsafe { + let pointer = unsafe { + self.level_1 + .get_unchecked(index.as_array_offset() >> LEVEL_1_INDEX_SHIFT) + }; - *level_2_node_dst = level_2_node_src; - } - 3 => { - let level_3_idx = array_offset >> LEVEL_3_IDX_SHIFT; + prefetch_read_data(pointer, Self::INTRINSIC_LOCALITY_LEVEL); + } + } - let level_3_node_src = unsafe { *src.level_3.get_unchecked(level_3_idx) }; - let level_3_node_dst = unsafe { self.level_3.get_unchecked_mut(level_3_idx) }; + pub fn prefetch_top_node_write(&self, index: LocalNodeIndex<3>) { + unsafe { + let pointer = unsafe { + self.level_1 + .get_unchecked(index.as_array_offset() >> LEVEL_1_INDEX_SHIFT) + }; - *level_3_node_dst = level_3_node_src; - } - _ => unreachable!(), + prefetch_write_data(pointer, Self::INTRINSIC_LOCALITY_LEVEL); } } }