Skip to content

Commit

Permalink
fix frustum cull and make node test branchless, continue bfs work (st…
Browse files Browse the repository at this point in the history
…ill WIP)
  • Loading branch information
burgerindividual committed Aug 12, 2023
1 parent d97eb63 commit 7763cc0
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 61 deletions.
2 changes: 1 addition & 1 deletion native/core/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ impl<T, const LEN: usize> CInlineVec<T, LEN> {
// _: *mut JEnv,
// _: *mut JClass,
// graph: JPtrMut<Graph>,
// frustum: JPtr<LocalFrustum>,
// frustum: JPtr<[[f32; 6]; 4]>,
// view_distance: Jint,
// out_results: JPtrMut<CVec<RegionDrawBatch>>,
// ) {
Expand Down
42 changes: 31 additions & 11 deletions native/core/src/graph/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,28 @@ impl<const LEVEL: u8> LocalNodeIndex<LEVEL> {
LowerNodeIter::new(self)
}

// #[inline(always)]
// pub fn get_all_neighbors(&self) -> [Self; 6] {
// const INC_MASKS: Simd<u32, 6> = Simd::from_array([]);
// }
#[inline(always)]
pub fn get_all_neighbors(&self) -> [Self; 6] {
const DEC_MASKS: Simd<u32, 6> = Simd::from_array([
LOCAL_NODE_INDEX_X_MASK,
LOCAL_NODE_INDEX_Y_MASK,
LOCAL_NODE_INDEX_Z_MASK,
u32::MAX,
u32::MAX,
u32::MAX,
]);

const INC_MASKS: Simd<u32, 6> = Simd::from_array([
u32::MAX,
u32::MAX,
u32::MAX,
LOCAL_NODE_INDEX_X_MASK,
LOCAL_NODE_INDEX_Y_MASK,
LOCAL_NODE_INDEX_Z_MASK,
]);

todo!()
}

#[inline(always)]
pub fn unpack(&self) -> u8x3 {
Expand Down Expand Up @@ -253,7 +271,7 @@ impl LocalCoordinateContext {
world_top_section_y: i8,
) -> Self {
assert!(
section_view_distance <= 127,
section_view_distance <= MAX_VIEW_DISTANCE,
"View distances above 127 are not supported"
);

Expand All @@ -279,6 +297,12 @@ impl LocalCoordinateContext {
// todo: should the +1 be here?
let world_height =
((world_top_section_y as i32) - (world_bottom_section_y as i32) + 1) as u8;

assert!(
world_height <= MAX_WORLD_HEIGHT,
"World heights larger than 254 sections are not supported"
);

// the add is done to make sure we round up during truncation
let level_3_node_iters = (u8x3::from_xyz(view_cube_length, world_height, view_cube_length)
+ Simd::splat(LEVEL_3_COORD_LENGTH - 1))
Expand All @@ -303,11 +327,6 @@ impl LocalCoordinateContext {
}
}

// #[no_mangle]
// pub fn test(&self, local_node_pos: u8x3) -> BoundsCheckResult {
// self.check_node::<3>(local_node_pos)
// }

#[inline(always)]
pub fn test_node<const LEVEL: u8>(
&self,
Expand Down Expand Up @@ -487,7 +506,8 @@ impl LocalFrustum {
// if any outside lengths are greater than -w, return OUTSIDE
// if all inside lengths are greater than -w, return INSIDE
// otherwise, return PARTIAL
let none_outside = outside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b000000;
// NOTE: it is impossible for a lane to be both inside and outside at the same time
let none_outside = outside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b111111;
let all_inside = inside_length_sq.simd_ge(-self.plane_ws).to_bitmask() == 0b111111;

BoundsCheckResult::from_int_unchecked(none_outside as u8 + all_inside as u8)
Expand Down
112 changes: 75 additions & 37 deletions native/core/src/graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::collections::VecDeque;
use std::fmt::Debug;
use std::intrinsics::{prefetch_read_data, prefetch_write_data};
use std::marker::PhantomData;
use std::mem::transmute;
use std::mem::{swap, transmute};
use std::ops::*;
use std::vec::Vec;

Expand All @@ -24,6 +24,12 @@ pub const REGION_COORD_MASK: u8x3 = u8x3::from_array([0b11111000, 0b11111100, 0b
pub const SECTIONS_IN_REGION: usize = 8 * 4 * 8;
pub const SECTIONS_IN_GRAPH: usize = 256 * 256 * 256;

pub const MAX_VIEW_DISTANCE: u8 = 127;
pub const MAX_WORLD_HEIGHT: u8 = 254;
pub const BFS_QUEUE_SIZE: usize =
GraphSearchState::get_bfs_queue_max_size(MAX_VIEW_DISTANCE, MAX_WORLD_HEIGHT, true) as usize;
pub type BfsQueue = ArrayDeque<LocalNodeIndex<1>, BFS_QUEUE_SIZE>;

#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct RegionSectionIndex(u8);
Expand Down Expand Up @@ -278,13 +284,57 @@ impl VisibilityData {

pub struct GraphSearchState {
incoming: [GraphDirectionSet; SECTIONS_IN_GRAPH],
// TODO: figure out a way to calculate a smaller value
queue: ArrayDeque<LocalNodeIndex<1>, SECTIONS_IN_GRAPH>,
queues: [BfsQueue; 2],

enqueued: bool,
/// when false, queue 0 is read and queue 1 is write.
/// when true, queue 0 is write and queue 1 is read.
queues_flipped: bool,
}

impl GraphSearchState {
pub const fn get_bfs_queue_max_size(
section_render_distance: u8,
world_height: u8,
frustum: bool,
) -> u32 {
// for the worst case, we will assume the player is in the center of the render distance and
// world height.
// for traversal lengths, we don't include the chunk the player is in.

let max_height_traversal = (world_height.div_ceil(2) - 1) as u32;
let max_width_traversal = section_render_distance as u32;

// the 2 accounts for the chunks directly above and below the player
let mut count = 2;
let mut layer_index = 1_u32;

// check if the traversal up and down is restricted by the world height. if so, remove the
// out-of-bounds layers from the iteration
if max_height_traversal < max_width_traversal {
count = 0;
layer_index = max_width_traversal - max_height_traversal;
}

// add rings that are on both the top and bottom.
// simplification of:
// while layer_index < max_width_traversal {
// count += (layer_index * 8);
// layer_index += 1;
// }
count += 4 * (max_width_traversal - layer_index) * (max_width_traversal + layer_index - 1);

// add final, outer-most ring.
count += (max_width_traversal * 4);

if frustum {
// divide by 2 because the player should never be able to see more than half of the world
// at once with frustum culling. This assumes an FOV maximum of 180 degrees.
count = count.div_ceil(2);
}

count
}

pub fn enqueue(&mut self, index: LocalNodeIndex<1>, incoming_direction: GraphDirection) {
// SAFETY: LocalNodeIndex should never have the top 8 bits set, and the array is exactly
// 2^24 elements long.
Expand All @@ -295,25 +345,37 @@ impl GraphSearchState {
node_incoming_directions.add(incoming_direction);

unsafe {
self.queue
self.get_write_queue()
.push_conditionally_unchecked(index, should_enqueue);
}
}

fn reset(&mut self) {
self.queue.reset();
self.incoming.fill(GraphDirectionSet::none());
fn get_read_queue(&self) -> &BfsQueue {
let idx = self.queues_flipped as usize;
&self.queues[idx]
}

fn get_write_queue(&mut self) -> &mut BfsQueue {
let idx = !self.queues_flipped as usize;
&mut self.queues[idx]
}

fn flip(&mut self) {
self.queues[0].reset();
self.queues[1].reset();

self.queues_flipped = !self.queues_flipped;

self.enqueued = false;
self.incoming.fill(GraphDirectionSet::none());
}
}

impl Default for GraphSearchState {
fn default() -> Self {
Self {
queue: Default::default(),
incoming: [GraphDirectionSet::default(); SECTIONS_IN_GRAPH],
enqueued: false,
queues: [Default::default(), Default::default()],
queues_flipped: false,
}
}
}
Expand Down Expand Up @@ -344,7 +406,8 @@ impl Graph {
fn frustum_and_fog_cull(&mut self, context: &LocalCoordinateContext) {
let mut level_3_index = context.iter_node_origin_index;

// this could go more linear in memory prolly, but eh
// this could go more linearly in memory, but we probably have good enough locality inside
// level 3 nodes
for _x in 0..context.level_3_node_iters.x() {
for _y in 0..context.level_3_node_iters.y() {
for _z in 0..context.level_3_node_iters.z() {
Expand Down Expand Up @@ -474,31 +537,6 @@ impl Graph {
todo!()
}

// fn enqueue_all_neighbors(
// state: &mut GraphSearchState,
// directions: GraphDirectionSet,
// index: LocalNodeIndex<1>,
// ) {
// for direction in GraphDirection::ORDERED {
// if directions.contains(direction) {
// let (neighbor, wrapped) = match direction {
// GraphDirection::NegX => origin.dec_x(),
// GraphDirection::NegY => origin.dec_y(),
// GraphDirection::NegZ => origin.dec_z(),
// GraphDirection::PosX => origin.inc_x(),
// GraphDirection::PosY => origin.inc_y(),
// GraphDirection::PosZ => origin.inc_z(),
// };
//
// if let Some(neighbor_region) = context.adjacent(direction, wrapped) {
// neighbor_region
// .search_state
// .enqueue(neighbor, GraphDirectionSet::single(direction.opposite()));
// }
// }
// }
// }

// pub fn add_section(&mut self, chunk_coord: LocalSectionCoord) {
// let mut region = self
// .regions
Expand Down
3 changes: 1 addition & 2 deletions native/core/src/graph/octree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ impl LinearBitOctree {

#[inline(always)]
pub fn clear(&mut self) {
// The default content is just 0s, so we can create a default and use that.
unsafe { self.level_3 = Self::default().level_3 };
self.level_3.fill(Level3Node::splat(0));
}

// inside of individual level 3 nodes, the cache locality is *extremely* good.
Expand Down
2 changes: 2 additions & 0 deletions native/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#![feature(portable_simd)]
#![feature(core_intrinsics)]
#![feature(cell_leak)]
// will be stabilized very soon, see https://github.com/rust-lang/rust/issues/88581
#![feature(int_roundings)]

mod collections;
mod ffi;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import net.minecraft.client.render.chunk.ChunkOcclusionData;

public class VisibilityEncoding {
/**
* @return a 36-bit representation of the occlusion data
*/
public static long extract(ChunkOcclusionData occlusionData) {
if (occlusionData == null) {
return 0;
Expand Down
37 changes: 27 additions & 10 deletions src/main/java/me/jellysquid/mods/sodium/core/CoreLib.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,33 @@ private static void copyFrustumPoints(FloatBuffer buf, FrustumIntersection frust

var planes = (Vector4f[]) field.get(frustum);

for (int i = 0; i < 6; i++) {
buf.put((i * )planes[i].x);
}

planes[0].get(0, buf);
planes[1].get(16, buf);
planes[2].get(32, buf);
planes[3].get(48, buf);
planes[4].get(64, buf);
planes[5].get(80, buf);
buf.put(planes[0].x);
buf.put(planes[1].x);
buf.put(planes[2].x);
buf.put(planes[3].x);
buf.put(planes[4].x);
buf.put(planes[5].x);

buf.put(planes[0].y);
buf.put(planes[1].y);
buf.put(planes[2].y);
buf.put(planes[3].y);
buf.put(planes[4].y);
buf.put(planes[5].y);

buf.put(planes[0].z);
buf.put(planes[1].z);
buf.put(planes[2].z);
buf.put(planes[3].z);
buf.put(planes[4].z);
buf.put(planes[5].z);

buf.put(planes[0].w);
buf.put(planes[1].w);
buf.put(planes[2].w);
buf.put(planes[3].w);
buf.put(planes[4].w);
buf.put(planes[5].w);
} catch (ReflectiveOperationException e) {
throw new RuntimeException("Failed to extract planes from frustum", e);
}
Expand Down

0 comments on commit 7763cc0

Please sign in to comment.