Skip to content

Commit

Permalink
Merge branch 'master' into nnue-evaluator
Browse files Browse the repository at this point in the history
  • Loading branch information
primenumber committed May 8, 2024
2 parents a2b663c + 15846c7 commit e50a8f6
Show file tree
Hide file tree
Showing 19 changed files with 288 additions and 255 deletions.
92 changes: 46 additions & 46 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,60 +30,60 @@ $ cargo run --release -- ffobench

## Benchmark result

- Date: 2024/02/21
- Date: 2024/03/29
- Hardware: AMD Ryzen 9 7950X3D, DDR5-4800 64GB
- Environment: Linux 6.5.0-18-generic, Ubuntu 22.04.4, rustc 1.78.0-nightly
- Environment: Linux 6.5.0-26-generic, Ubuntu 22.04.4, rustc 1.78.0-nightly

FFO 40-59

|No.|empties|result|answer|move|nodes|time|NPS|
|---:|---:|---:|---:|---:|---:|:--:|---:|
|40|20|+38|+38|A2|90.4M| 0.054s|1644M/s|
|41|22| +0| +0|H4| 117M| 0.092s|1267M/s|
|42|22| +6| +6|G2| 287M| 0.177s|1617M/s|
|43|23|-12|-12|C7| 159M| 0.121s|1303M/s|
|44|23|-14|-14|D2| 111M| 0.083s|1322M/s|
|45|24| +6| +6|B2|1.57G| 1.010s|1561M/s|
|46|24| -8| -8|B3| 494M| 0.349s|1412M/s|
|47|25| +4| +4|G2| 167M| 0.123s|1350M/s|
|48|25|+28|+28|F6| 901M| 0.680s|1323M/s|
|49|26|+16|+16|E1|3.10G| 1.988s|1559M/s|
|50|26|+10|+10|D8|3.56G| 2.773s|1285M/s|
|51|27| +6| +6|E2|1.47G| 1.240s|1192M/s|
|52|27| +0| +0|A3|1.31G| 1.080s|1215M/s|
|53|28| -2| -2|D8|5.77G| 4.829s|1196M/s|
|54|28| -2| -2|C7|15.6G| 11.420s|1368M/s|
|55|29| +0| +0|G6|29.2G| 28.391s|1030M/s|
|56|29| +2| +2|H5|4.56G| 4.727s|965M/s|
|57|30|-10|-10|A6|19.8G| 18.593s|1066M/s|
|58|30| +4| +4|G1|4.83G| 4.961s|973M/s|
|59|34|+64|+64|G8|1.66k| 0.034s|0M/s|

[Total] elapsed: 82738454us, node count: 93254297417, NPS: 1127097412nodes/sec
|40|20|+38|+38|A2|76.2M| 0.062s|1209M/s|
|41|22| +0| +0|H4|87.2M| 0.080s|1076M/s|
|42|22| +6| +6|G2| 270M| 0.178s|1511M/s|
|43|23|-12|-12|C7| 164M| 0.146s|1122M/s|
|44|23|-14|-14|D2|84.3M| 0.081s|1029M/s|
|45|24| +6| +6|B2|1.51G| 0.978s|1547M/s|
|46|24| -8| -8|B3| 443M| 0.329s|1343M/s|
|47|25| +4| +4|G2| 133M| 0.122s|1084M/s|
|48|25|+28|+28|F6| 874M| 0.676s|1291M/s|
|49|26|+16|+16|E1|3.14G| 2.047s|1536M/s|
|50|26|+10|+10|D8|3.24G| 2.543s|1276M/s|
|51|27| +6| +6|E2|1.41G| 1.188s|1191M/s|
|52|27| +0| +0|A3|1.42G| 1.175s|1208M/s|
|53|28| -2| -2|D8|4.91G| 4.038s|1216M/s|
|54|28| -2| -2|C7|13.6G| 9.923s|1375M/s|
|55|29| +0| +0|G6|27.3G| 25.529s|1070M/s|
|56|29| +2| +2|H5|4.26G| 4.629s|921M/s|
|57|30|-10|-10|A6|19.6G| 18.350s|1071M/s|
|58|30| +4| +4|G1|4.48G| 4.760s|942M/s|
|59|34|+64|+64|G8|1.26k| 0.029s|0M/s|

[Total] elapsed: 76872921us, node count: 87193095283, NPS: 1134249800nodes/sec

FFO 60-79

|No.|empties|result|answer|move|nodes|time|NPS|
|---:|---:|---:|---:|---:|---:|:--:|---:|
|60|24|+20|+20|C2| 216M| 0.164s|1313M/s|
|61|25|-14|-14|G1| 339M| 0.318s|1062M/s|
|62|27|+28|+28|E8|8.51G| 7.369s|1154M/s|
|63|27| -2| -2|F2|2.88G| 2.393s|1203M/s|
|64|27|+20|+20|B4|11.2G| 9.350s|1207M/s|
|65|28|+10|+10|G1|29.2G| 20.527s|1422M/s|
|66|28|+30|+30|H3|21.1G| 16.079s|1314M/s|
|67|28|+22|+22|H3|28.2G| 20.363s|1386M/s|
|68|30|+28|+28|E8| 139G| 107.049s|1301M/s|
|69|30| +0| +0|H3|15.7G| 14.081s|1121M/s|
|70|30|-24|-24|E3|14.4G| 13.097s|1106M/s|
|71|31|+20|+20|D2|20.1G| 19.281s|1044M/s|
|72|31|+24|+24|E1| 258G| 298.006s|867M/s|
|73|31| -4| -4|G4|29.4G| 34.672s|849M/s|
|74|31|-30|-30|F1| 611G| 563.534s|1084M/s|
|75|32|+14|+14|D2| 299G| 225.346s|1327M/s|
|76|32|+32|+32|A3|2.12T|1869.331s|1134M/s|
|77|34|+34|+34|B7|1.13T|1015.754s|1121M/s|
|78|34| +8| +8|F1| 673G| 844.518s|797M/s|
|79|36|+64|+64|D7|56.8G| 43.134s|1319M/s|

[Total] elapsed: 5124378900us, node count: 5480838886501, NPS: 1069561598nodes/sec
|60|24|+20|+20|C2| 213M| 0.175s|1213M/s|
|61|25|-14|-14|G1| 344M| 0.325s|1058M/s|
|62|27|+28|+28|E8|7.61G| 6.587s|1155M/s|
|63|27| -2| -2|F2|2.89G| 2.383s|1215M/s|
|64|27|+20|+20|B4|10.4G| 8.675s|1204M/s|
|65|28|+10|+10|G1|26.6G| 18.953s|1405M/s|
|66|28|+30|+30|H3|19.7G| 15.215s|1299M/s|
|67|28|+22|+22|H3|25.8G| 18.823s|1375M/s|
|68|30|+28|+28|E8| 116G| 90.898s|1287M/s|
|69|30| +0| +0|H3|14.5G| 13.385s|1086M/s|
|70|30|-24|-24|E3|13.0G| 11.413s|1146M/s|
|71|31|+20|+20|D2|21.3G| 21.350s|999M/s|
|72|31|+24|+24|E1| 178G| 197.643s|902M/s|
|73|31| -4| -4|G4|25.8G| 31.323s|824M/s|
|74|31|-30|-30|F1| 599G| 580.678s|1031M/s|
|75|32|+14|+14|D2| 234G| 180.377s|1300M/s|
|76|32|+32|+32|A3|1.56T|1441.196s|1087M/s|
|77|34|+34|+34|B7|1.23T|1122.816s|1100M/s|
|78|34| +8| +8|F1| 571G| 831.075s|687M/s|
|79|36|+64|+64|D7|14.6G| 12.269s|1191M/s|

[Total] elapsed: 4605575684us, node count: 4685891940872, NPS: 1017438918nodes/sec
10 changes: 5 additions & 5 deletions src/book.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl Book {
let reader = BufReader::new(f);
let mut book = Book::new();
for line in reader.lines() {
book.append(Record::parse(&line?)?)?;
book.append(line?.parse::<Record>()?)?;
}
Ok(book)
}
Expand Down Expand Up @@ -162,7 +162,7 @@ impl Book {
}
hands.push(hand);
}
new_records.push(Record::new(rec.get_initial(), &hands, last_score.unwrap()));
new_records.push(Record::new(rec.get_initial(), &hands, last_score));
}
new_records.dedup();
Book::from_records(&new_records)
Expand All @@ -178,7 +178,7 @@ fn search<Eval: Evaluator>(
solve_obj.cache_gen += 1;
if board.empty().count_ones() <= 18 {
let mut solve_obj = solve_obj.clone();
solve_with_move(board, &mut solve_obj, &sub_solver.clone())
solve_with_move(board, &mut solve_obj, &sub_solver.clone(), None)
} else {
let start = Instant::now();
let timer = Timer {
Expand All @@ -192,7 +192,7 @@ fn search<Eval: Evaluator>(
node_count: 0,
cache_gen: solve_obj.cache_gen,
};
let (_score, hand, _depth) = think_parallel(
let (_score, hand, _depth, _node_count) = think_parallel(
&searcher,
board,
solve_obj.evaluator.score_min(),
Expand Down Expand Up @@ -242,7 +242,7 @@ fn play_with_book<Eval: Evaluator>(
hands.push(hand);
board = board.play_hand(hand).unwrap();
}
let record = Record::new(Board::initial_state(), &hands, board.score().into());
let record = Record::new(Board::initial_state(), &hands, Some(board.score().into()));
eprintln!("{}", record);
book.lock().unwrap().append(record).unwrap();
}
Expand Down
4 changes: 2 additions & 2 deletions src/engine/bits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ pub trait BitManip {
}

impl BitManip for u64 {
#[cfg(target_feature = "avx2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn pext(&self, mask: u64) -> u64 {
unsafe { _pext_u64(*self, mask) }
}

#[cfg(not(target_feature = "avx2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn pext(&self, mut mask: u64) -> u64 {
let mut x = *self;
x = x & mask;
Expand Down
9 changes: 6 additions & 3 deletions src/engine/board.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#[cfg(test)]
mod test;
#[cfg(target_feature = "neon")]
use std::arch::aarch64::*;
use crate::engine::bits::*;
use crate::engine::hand::*;
use anyhow::Result;
use clap::ArgMatches;
#[cfg(target_feature = "neon")]
use std::arch::aarch64::*;
use std::cmp::min;
use std::fmt;
use std::io::{BufWriter, Write};
Expand Down Expand Up @@ -55,7 +55,10 @@ fn smart_upper_bit(x: u64x4) -> u64x4 {
}
}

#[cfg(not(any(target_feature = "neon", all(target_feature = "avx512cd", target_feature = "avx512vl"))))]
#[cfg(not(any(
target_feature = "neon",
all(target_feature = "avx512cd", target_feature = "avx512vl")
)))]
fn smart_upper_bit(mut x: u64x4) -> u64x4 {
x |= x >> u64x4::from_array([8, 1, 7, 9]);
x |= x >> u64x4::from_array([16, 2, 14, 18]);
Expand Down
26 changes: 25 additions & 1 deletion src/engine/endgame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ use crate::engine::hand::*;
use crate::engine::search::*;
use crate::engine::table::*;
use arrayvec::ArrayVec;
use crc64::Crc64;
use std::cmp::max;
use std::io::Write;

fn near_leaf<Eval: Evaluator>(solve_obj: &mut SolveObj<Eval>, board: Board) -> (i8, SolveStat) {
let (score, node_count) = solve_obj.last_cache.solve_last(board);
Expand Down Expand Up @@ -211,8 +213,30 @@ pub fn solve_inner<Eval: Evaluator>(
CutType::LessThanAlpha(v) => return (v, SolveStat::one_stcut()),
}
}
if rem < solve_obj.params.res_cache_limit {
if rem < solve_obj.params.local_res_cache_limit {
fastest_first(solve_obj, board, (alpha, beta), passed)
} else if rem < solve_obj.params.res_cache_limit {
let mut crc64 = Crc64::new();
crc64.write(&board.player.to_le_bytes()).unwrap();
crc64.write(&board.opponent.to_le_bytes()).unwrap();
let hash = crc64.get();
let res_cache = solve_obj.local_res_cache.get(board, hash);
let lookup_result = make_lookup_result(res_cache, (&mut alpha, &mut beta));
let (lower, upper) = match lookup_result {
CacheLookupResult::Cut(v) => return (v, SolveStat::zero()),
CacheLookupResult::NoCut(l, u, _) => (l, u),
};
let (res, stat) = fastest_first(solve_obj, board, (alpha, beta), passed);
let record = make_record(
solve_obj.local_cache_gen,
board,
res,
None,
(alpha, beta),
(lower, upper),
);
solve_obj.local_res_cache.update(&record, hash);
(res, stat)
} else if rem < solve_obj.params.eval_ordering_limit {
let (lower, upper) = match lookup_table(solve_obj, board, (&mut alpha, &mut beta)) {
CacheLookupResult::Cut(v) => return (v, SolveStat::zero()),
Expand Down
2 changes: 0 additions & 2 deletions src/engine/eval.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#[cfg(test)]
mod test;
use crate::engine::board::*;

pub fn pow3(x: i8) -> usize {
Expand Down
12 changes: 6 additions & 6 deletions src/engine/last_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,23 @@ impl LastCache {
}
}

#[cfg(target_feature = "bmi2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn get_col_bits(bits: u64, mask: u64, _col: usize) -> u64 {
bits.pext(mask)
}

#[cfg(not(target_feature = "bmi2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn get_col_bits(mut bits: u64, mask: u64, col: usize) -> u64 {
bits &= mask;
((bits >> col).wrapping_mul(0x0002_0408_1020_4081) >> 49) & 0xff
}

#[cfg(target_feature = "bmi2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn get_diag1_bits(bits: u64, mask: u64, _row: usize, _col: usize) -> u64 {
bits.pext(mask)
}

#[cfg(not(target_feature = "bmi2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn get_diag1_bits(mut bits: u64, mask: u64, row: usize, col: usize) -> u64 {
bits &= mask;
let width = if row >= col {
Expand All @@ -92,12 +92,12 @@ impl LastCache {
(bits.wrapping_mul(0x0101_0101_0101_0101) >> 56) & ((1 << width) - 1)
}

#[cfg(target_feature = "bmi2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn get_diag2_bits(bits: u64, mask: u64, _row: usize, _col: usize) -> u64 {
bits.pext(mask)
}

#[cfg(not(target_feature = "bmi2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn get_diag2_bits(mut bits: u64, mask: u64, row: usize, col: usize) -> u64 {
bits &= mask;
let width = if row + col >= 7 {
Expand Down
5 changes: 4 additions & 1 deletion src/engine/midgame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ fn simplified_abdada_intro<Eval: Evaluator>(
if depth >= ctx.solve_obj.params.parallel_depth_limit || rem < ctx.solve_obj.params.parallel_empties_limit {
let (res, stat) = solve_inner(&mut ctx.solve_obj, board, (alpha, beta), passed);
ctx.stats.merge(stat);
ctx.solve_obj.local_cache_gen += 1;
return Some((res, None));
}
ctx.stats.merge(SolveStat::one());
Expand Down Expand Up @@ -148,12 +149,14 @@ pub fn simplified_abdada<Eval: Evaluator>(
(alpha, beta): (i8, i8),
passed: bool,
depth: i8,
num_threads: Option<usize>,
) -> (i8, Option<Hand>, SolveStat) {
thread::scope(|s| {
let mut handles = Vec::new();
let cs_hash = Arc::new(DashSet::new());
let finished = Arc::new(AtomicBool::new(false));
for _ in 0..num_cpus::get() {
let num_threads = num_threads.unwrap_or(num_cpus::get());
for _ in 0..num_threads {
let solve_obj = solve_obj.clone();
let cs_hash = cs_hash.clone();
let finished = finished.clone();
Expand Down
6 changes: 3 additions & 3 deletions src/engine/eval/test.rs → src/engine/pattern_eval/test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
extern crate test;
use super::*;
use crate::setup::*;
use std::io::{BufRead, BufReader};
use std::sync::Arc;
use test::Bencher;

fn load_stress_test_set() -> Vec<(Board, i8)> {
Expand All @@ -26,13 +26,13 @@ fn load_stress_test_set() -> Vec<(Board, i8)> {

#[bench]
fn bench_eval(b: &mut Bencher) {
let solve_obj = setup_default();
let evaluator = Arc::new(PatternLinearEvaluator::load(Path::new("table-220710")).unwrap());
let dataset = load_stress_test_set();

b.iter(|| {
dataset
.iter()
.map(|(board, _)| solve_obj.evaluator.eval(*board) as i32)
.map(|(board, _)| evaluator.eval(*board) as i32)
.sum::<i32>()
});
}
Loading

0 comments on commit e50a8f6

Please sign in to comment.