Skip to content

Commit

Permalink
[prakriya] Increase rule coverage to roughly 2100
Browse files Browse the repository at this point in the history
Rule and test coverage:

- Increase number of implemented rules from ~1900 to ~2074. These rules
  mainly add stronger support for samasas, svaras, and nama-dhatus.

- Increase number of passing tests from 2169 to 2421.

Design:

- Add tentative support for recursive derivations by refactoring our
  args to use a more Paninian data model. This new argument structure
  supports a simple declarative API for deriving words. Our underlying
  implementation is largely the same, but this API allows us to move
  away from distinct derivational phases, which has implications for
  quality and performance.

- As a first consequence of this API refactor, defer running the
  `tripadi` rules to the end of the derivation, as opposed to running
  them twice (once for the dhatu and once for the final output). Doing
  so fixes several major bugs (DIpsati, Ambibat) and makes significant
  progress toward fixing many more.

Performance:

- Improve performance by around 13%. This improvement comes mainly from
  upgrading `compact_str` to version 0.7.1, which uses branchless
  instructions. We also observed a smaller improvement from deferring
  `tripadi` rules and believe more improvements are possible if we
  optimize the `angasya` section.

Approximate rule coverage, where "Tested" includes ignored tests:

```
+---------+------------+------------+------------+------------+
| Pada    |    Written |     Tested |    Missing |      Total |
+---------+------------+------------+------------+------------+
| 1.1     |          4 |         51 |         20 |         75 |
| 1.2     |          0 |         30 |         43 |         73 |
| 1.3     |          1 |         76 |         16 |         93 |
| 1.4     |         14 |         33 |         63 |        110 |
| 2.1     |         13 |         34 |         25 |         72 |
| 2.2     |          4 |          4 |         30 |         38 |
| 2.3     |          0 |          6 |         67 |         73 |
| 2.4     |          5 |         29 |         51 |         85 |
| 3.1     |          3 |        112 |         35 |        150 |
| 3.2     |         12 |        130 |         46 |        188 |
| 3.3     |          6 |         77 |         93 |        176 |
| 3.4     |          2 |         40 |         75 |        117 |
| 4.1     |          7 |         80 |         91 |        178 |
| 4.2     |          7 |         65 |         73 |        145 |
| 4.3     |         12 |         75 |         81 |        168 |
| 4.4     |          3 |        106 |         35 |        144 |
| 5.1     |         12 |         31 |         93 |        136 |
| 5.2     |          5 |         60 |         75 |        140 |
| 5.3     |         13 |         59 |         47 |        119 |
| 5.4     |         12 |         51 |         97 |        160 |
| 6.1     |         12 |        103 |        108 |        223 |
| 6.2     |          2 |          6 |        191 |        199 |
| 6.3     |          7 |         29 |        103 |        139 |
| 6.4     |         14 |        124 |         37 |        175 |
| 7.1     |          1 |         79 |         23 |        103 |
| 7.2     |          4 |         95 |         19 |        118 |
| 7.3     |          1 |         80 |         39 |        120 |
| 7.4     |          4 |         77 |         16 |         97 |
| 8.1     |          0 |          0 |         74 |         74 |
| 8.2     |          7 |         55 |         46 |        108 |
| 8.3     |          9 |         48 |         62 |        119 |
| 8.4     |          2 |         31 |         35 |         68 |
+---------+------------+------------+------------+------------+
| All     |        198 |       1876 |       1909 |       3983 |
+---------+------------+------------+------------+------------+
```
  • Loading branch information
akprasad committed Nov 25, 2023
1 parent e54d782 commit 75728aa
Show file tree
Hide file tree
Showing 130 changed files with 9,005 additions and 4,593 deletions.
6 changes: 4 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vidyut-cheda/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ multimap = "0.8.3"
modular-bitfield = "0.11.2"
priority-queue = "1.2.3"
regex = "1.6.0"
compact_str = "0.6.1"
compact_str = "0.7.1"
rustc-hash = "1.1.0"

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion vidyut-prakriya/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ edition = "2021"

[dependencies]
clap = { version = "4.0.12", features = ["derive"] }
compact_str = { version = "0.6.1", features = ["serde"] }
compact_str = { version = "0.7.1", features = ["serde"] }
csv = "1.1.6"
enumset = { version = "1.1.3", features = ["serde"] }
lazy_static = "1.4.0"
Expand Down
14 changes: 5 additions & 9 deletions vidyut-prakriya/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ create_test_files:
../target/release/create_krdantas --krt ktvA > test-files/krdantas-ktvA.csv
../target/release/create_krdantas --krt kta > test-files/krdantas-kta.csv

create_subantas:
# Work-in-progress tests for subantas.
cargo run --bin create_subantas > test-files/subantas.csv

# Runs a full evaluation over all forms generated by vidyut-prakriya. `hash` is
# the SHA-256 hash of the test file. We use `hash` to verify test file
# integrity and ensure that our test cases are stable.
Expand All @@ -53,19 +49,19 @@ test_tinantas:
cargo build --release
../target/release/test_tinantas \
--test-cases test-files/tinantas-basic-kartari.csv \
--hash "f8934f99631e811c333c41ddd4925229d2faab0dd875bc549bb38350319706db"
--hash "13ca3874fc1624c7ba74b3586e4040da7d47fbdd1bbbe12b4987992d266d1e3c"
../target/release/test_tinantas \
--test-cases test-files/tinantas-nic-kartari.csv \
--hash "2e3d0f56c4e6d375b7064df034a7ee04a7cc91f10838ceee32cbeb37ad2870c5"
--hash "1122c23a5dc53b74b1c140b6d7f0256e83926010783ae9fd25de8eb9e342697b"
../target/release/test_tinantas \
--test-cases test-files/tinantas-san-kartari.csv \
--hash "0dfec6333abf094ed8199694e2e55436991f837cacf223de3fd1223b576712e3"
--hash "21b1dc3c5f7c6598be919022f8a09f865bd39827f99455fa24cb0e07244dc4e3"
../target/release/test_tinantas \
--test-cases test-files/tinantas-yan-kartari.csv \
--hash "08c5b0f9b6b2fa857018653583b63571eac3074804025d90c12e0c30a0db0616"
--hash "640447e069abd4bdbd0522e886152c4fdf251f61e54f6d8af1e2d0bd4b7f5313"
../target/release/test_tinantas \
--test-cases test-files/tinantas-basic-karmani.csv \
--hash "da0e4771bec284661bfd0f537734d44eb6e019e41a387e80dfaa80cf7dc27b03"
--hash "6dec033579ce574d4f64e7d224cc5c1c573f457d6a74900ad64d9706afa545ff"

test_krdantas:
cargo build --release
Expand Down
48 changes: 38 additions & 10 deletions vidyut-prakriya/scripts/check_rule_coverage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
from pathlib import Path
from collections import Counter
import re
import glob

Expand Down Expand Up @@ -41,23 +42,50 @@ def print_legend():
for match in re.findall(r"(\d+_\d+_\d+)", line):
tested_rules.add(match.replace('_', '.'))

print_legend()
num_ok = 0
num_untested = 0
num_missing = 0
for rule in all_rules:
status = None
if rule in tested_rules:
status = RULE_OK
num_ok += 1
elif rule in implemented_rules:
status = RULE_UNTESTED
num_untested += 1
else:
status = RULE_MISSING
num_missing += 1
print(f"{status}\t\t{rule}")

print_legend()
print(f"Num tested : {num_ok}")
print(f"Num untested : {num_untested}")
print(f"Num missing : {num_missing}")

pada_total = Counter()
pada_written = Counter()
pada_tested = Counter()
pada_missing = Counter()
for rule in all_rules:
ap, _, sutra = rule.rpartition('.')
pada_total[ap] += 1
if rule in tested_rules:
pada_tested[ap] += 1
elif rule in implemented_rules:
pada_written[ap] += 1
else:
pada_missing[ap] += 1

print("Coverage by pada:")
print()
print(f"+---------+------------+------------+------------+------------+")
print(f"| Pada | Written | Tested | Missing | Total |")
print(f"+---------+------------+------------+------------+------------+")
for key, total in pada_total.items():
written = pada_written[key]
tested = pada_tested[key]
missing = pada_missing[key]
print(f"| {key} | {written:>10} | {tested:>10} | {missing:>10} | {total:>10} |")
written = pada_written.total()
total = pada_total.total()
tested = pada_tested.total()
missing = pada_missing.total()
print(f"+---------+------------+------------+------------+------------+")
print(f"| All | {written:>10} | {tested:>10} | {missing:>10} | {total:>10} |")
print(f"+---------+------------+------------+------------+------------+")

print()
num_ok = total - pada_missing.total()
print("Num tested or implemented: {}".format(num_ok))
111 changes: 44 additions & 67 deletions vidyut-prakriya/src/ac_sandhi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! =========
//! (6.1.66 - 6.1.101)
use crate::core::char_view::{get_at, xy, CharPrakriya};
use crate::core::char_view::{get_at, get_term_index_at, xy, CharPrakriya};
use crate::core::iterators::xy_rule;
use crate::core::operators as op;
use crate::core::Prakriya;
Expand Down Expand Up @@ -34,7 +34,7 @@ pub fn try_lopo_vyor_vali(p: &mut Prakriya) {
None => return false,
};
let vyor_vali = (x == 'v' || x == 'y') && VAL.contains(y);
let t = get_at(p, i).expect("should be present");
let t_x = get_at(p, i).expect("should be present");
// Ignore if it starts an upadesha, otherwise roots like "vraj" would by vyartha.
// - Likewise for roots ending with 'v'.
// - Likewise for pratipadikas.
Expand All @@ -43,9 +43,9 @@ pub fn try_lopo_vyor_vali(p: &mut Prakriya) {
// - Exclude pratyayas (yAyA[y]vara -> yAyAvara).
//
// For now, just check if the term is a dhatu.
let is_mula_dhatu = t.is_dhatu() && !t.is_pratyaya();
let is_upadesha_adi = is_mula_dhatu && (t.has_adi('v') || t.has_adi('y'));
vyor_vali && !is_upadesha_adi && !t.is_pratipadika()
let is_mula_dhatu = t_x.is_dhatu() && !t_x.is_pratyaya();
let is_upadesha_adi = is_mula_dhatu && (t_x.has_adi('v') || t_x.has_adi('y'));
vyor_vali && !is_upadesha_adi && !(t_x.is_pratipadika() && !t_x.is_pratyaya())
},
|p, _, i| {
p.run("6.1.66", |p| p.set_char_at(i, ""));
Expand All @@ -55,10 +55,11 @@ pub fn try_lopo_vyor_vali(p: &mut Prakriya) {
}

fn try_ver_aprktasya(p: &mut Prakriya) -> Option<()> {
let i = p.find_last(T::Pratyaya)?;
let last = p.get(i)?;
if last.has_text("v") {
p.run_at("6.1.67", i, op::lopa);
for i in 0..p.terms().len() {
let t = p.get(i)?;
if t.is_pratyaya() && t.has_text("v") {
p.run_at("6.1.67", i, op::lopa);
}
}

Some(())
Expand Down Expand Up @@ -91,11 +92,16 @@ pub fn apply_general_ac_sandhi(p: &mut Prakriya) {
}

cp.for_chars(xy(|x, y| AC.contains(x) && AC.contains(y)), |p, text, i| {
p.dump();
let x = text.as_bytes()[i] as char;
let y = text.as_bytes()[i + 1] as char;

let t_x = get_at(p, i).expect("ok");
let i_x = get_term_index_at(p, i).expect("ok");
let i_y = get_term_index_at(p, i + 1).expect("ok");
let t_x = p.get(i_x).expect("ok");
let t_y = p.get(i_y).expect("ok");

let eti_edhati = || t_y.has_adi(&*EN) && t_y.has_u_in(&["i\\R", "eDa~\\"]);
let is_uth = || t_y.has_adi('U') && t_y.has_tag(T::FlagUth);

if t_x.has_tag(T::Pragrhya) {
// agnI iti, ...
Expand Down Expand Up @@ -131,68 +137,37 @@ pub fn apply_general_ac_sandhi(p: &mut Prakriya) {
};
p.run("6.1.77", |p| p.set_char_at(i, res));
true
} else {
false
}
});

// upa + fcCati -> upArcCati
cp.for_terms(
|x, y| x.is_upasarga() && x.has_antya(&*A) && y.is_dhatu() && y.has_adi('f'),
|p, i, j| {
p.set(i, |t| t.set_antya(""));
p.set(j, |t| t.set_adi("Ar"));
p.step("6.1.91");
},
);

// upa + eti -> upEti
cp.for_terms(
|x, y| {
let eti_edhati = y.has_adi(&*EN) && y.has_u_in(&["i\\R", "eDa~\\"]);
let is_uth = y.has_adi('U') && y.has_tag(T::FlagUth);
!x.is_agama() && x.has_antya(&*A) && (eti_edhati || is_uth)
},
|p, _i, j| {
let y = p.get(j).expect("ok");
let adi = y.adi().expect("ok");
} else if t_x.is_upasarga() && t_x.has_antya(&*A) && t_y.is_dhatu() && t_y.has_adi('f') {
// upa + fcCati -> upArcCati
p.run("6.1.91", |p| {
p.set(i_x, |t| t.set_antya(""));
p.set(i_y, |t| t.set_adi("Ar"));
});
true
} else if !t_x.is_agama() && t_x.has_antya(&*A) && (eti_edhati() || is_uth()) {
// upa + eti -> upEti
let adi = t_y.adi().expect("ok");
let sub = al::to_vrddhi(adi).expect("ok");
p.run_at("6.1.89", j, |t| t.set_adi(sub));
},
);

// HACK for KOnAti, DOta, and a few others
cp.for_terms(
|x, _| x.has_suffix_in(&["aU", "AU"]),
|p, i, _| {
p.run_at("6.1.89", i, |t| {
p.run_at("6.1.89", i_y, |t| t.set_adi(sub));
true
} else if t_x.has_suffix_in(&["aU", "AU"]) {
// HACK for KOnAti, DOta, and a few others
p.run_at("6.1.89", i_x, |t| {
t.set_antya("");
t.set_antya("O")
});
},
);

// upa + elayati -> upelayati
cp.for_terms(
|x, y| x.is_upasarga() && x.has_antya(&*A) && y.is_dhatu() && y.has_adi(&*EN),
|p, i, _j| {
p.set(i, |t| t.set_antya(""));
p.step("6.1.94");
},
);

// General guna/vrddhi rules.
cp.for_chars(
// [dummy comment for cargo fmt]
xy(|x, y| A.contains(x) && AC.contains(y)),
|p, text, i| {
true
} else if t_x.is_upasarga() && t_x.has_antya(&*A) && t_y.is_dhatu() && t_y.has_adi(&*EN) {
// upa + elayati -> upelayati
p.run_at("6.1.94", i_x, |t| t.set_antya(""));
true
} else if A.contains(x) && AC.contains(y) {
// General guna/vrddhi rules.
if is_upasarga_sanadi_dhatu(p, i) {
return false;
}

let j = i + 1;
let y = text.as_bytes()[i + 1] as char;

if EC.contains(y) {
p.run("6.1.88", |p| {
p.set_char_at(j, al::to_vrddhi(y).expect("should have vrddhi"));
Expand All @@ -205,8 +180,10 @@ pub fn apply_general_ac_sandhi(p: &mut Prakriya) {
});
}
true
},
);
} else {
false
}
});
}

pub fn try_sup_sandhi_before_angasya(p: &mut Prakriya) -> Option<()> {
Expand Down Expand Up @@ -279,7 +256,7 @@ pub fn try_sup_sandhi_after_angasya(p: &mut Prakriya) -> Option<()> {

/// Runs vowel sandhi rules that apply between terms (as opposed to between sounds).
fn apply_ac_sandhi_at_term_boundary(p: &mut Prakriya, i: usize) -> Option<()> {
let j = p.find_next_where(i, |t| !t.text.is_empty())?;
let j = p.find_next_where(i, |t| !t.is_empty())?;

let x = p.get(i)?;
let y = p.get(j)?;
Expand Down
Loading

0 comments on commit 75728aa

Please sign in to comment.