From 49ccd0cd1fe30affedc2952eed7a7a903d3d0610 Mon Sep 17 00:00:00 2001
From: Ryo Yamashita <qryxip@gmail.com>
Date: Thu, 8 Feb 2024 02:41:33 +0900
Subject: [PATCH 1/2] =?UTF-8?q?`load{,=5Fwith=5Fuserdic}`=E3=81=AE?=
 =?UTF-8?q?=E5=BC=95=E6=95=B0=E3=82=92`Utf8Path`=E3=81=AB=E3=81=97?=
 =?UTF-8?q?=E3=80=81`\0`=E5=85=A5=E3=82=8A=E3=82=92=E3=82=A8=E3=83=A9?=
 =?UTF-8?q?=E3=83=BC=E3=81=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/open_jtalk/Cargo.toml       |  1 +
 crates/open_jtalk/src/mecab/mod.rs | 91 +++++++++++++++++++-----------
 crates/open_jtalk/src/njd.rs       | 14 ++---
 3 files changed, 65 insertions(+), 41 deletions(-)

diff --git a/crates/open_jtalk/Cargo.toml b/crates/open_jtalk/Cargo.toml
index 25342e0..d85b8d3 100644
--- a/crates/open_jtalk/Cargo.toml
+++ b/crates/open_jtalk/Cargo.toml
@@ -4,6 +4,7 @@ version = "0.1.25"
 edition = "2021"
 
 [dependencies]
+camino = "1.1.6"
 open_jtalk-sys = { path = "../open_jtalk-sys", version = "0.16.111" }
 thiserror = "1.0.31"
 
diff --git a/crates/open_jtalk/src/mecab/mod.rs b/crates/open_jtalk/src/mecab/mod.rs
index 8847d91..2a4e1cc 100644
--- a/crates/open_jtalk/src/mecab/mod.rs
+++ b/crates/open_jtalk/src/mecab/mod.rs
@@ -3,7 +3,16 @@ mod mecab_dict_index;
 pub use mecab_dict_index::*;
 
 use super::*;
-use std::{ffi::CString, mem::MaybeUninit, path::Path};
+use camino::{Utf8Path, Utf8PathBuf};
+use std::{ffi::CString, mem::MaybeUninit};
+
+#[derive(thiserror::Error, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
+pub enum MecabLoadError {
+    #[error("`{function}` failed")]
+    Unsuccessful { function: &'static str },
+    #[error("file name contained an NUL byte: {filename:?}")]
+    Nul { filename: Utf8PathBuf },
+}
 
 #[derive(Default)]
 pub struct Mecab(Option<open_jtalk_sys::Mecab>);
@@ -38,35 +47,42 @@ impl Mecab {
         self.0.as_ref().unwrap() as *const open_jtalk_sys::Mecab as *mut open_jtalk_sys::Mecab
     }
 
-    pub fn load(&mut self, dic_dir: impl AsRef<Path>) -> bool {
-        let dic_dir = CString::new(dic_dir.as_ref().to_str().unwrap()).unwrap();
-        unsafe {
-            bool_number_to_bool(open_jtalk_sys::Mecab_load(
-                self.as_raw_ptr(),
-                dic_dir.as_ptr(),
-            ))
+    pub fn load(&mut self, dic_dir: impl AsRef<Utf8Path>) -> Result<(), MecabLoadError> {
+        let dic_dir = c_filename(dic_dir.as_ref())?;
+        let success = bool_number_to_bool(unsafe {
+            open_jtalk_sys::Mecab_load(self.as_raw_ptr(), dic_dir.as_ptr())
+        });
+        if !success {
+            return Err(MecabLoadError::Unsuccessful {
+                function: "Mecab_load",
+            });
         }
-    }
-
-    /// # Panics
-    ///
-    /// 次の場合にパニックする。
-    ///
-    /// - `dic_dir`または`userdic`が`\0`を含む。
-    /// - `dic_dir`または`userdic`がUTF-8の文字列ではない。
-    pub fn load_with_userdic(&mut self, dic_dir: &Path, userdic: Option<&Path>) -> bool {
-        let dic_dir = CString::new(dic_dir.to_str().unwrap()).unwrap();
-        let userdic = &userdic.map(|userdic| CString::new(userdic.to_str().unwrap()).unwrap());
-        unsafe {
-            bool_number_to_bool(open_jtalk_sys::Mecab_load_with_userdic(
+        Ok(())
+    }
+
+    pub fn load_with_userdic(
+        &mut self,
+        dic_dir: &Utf8Path,
+        userdic: Option<&Utf8Path>,
+    ) -> Result<(), MecabLoadError> {
+        let dic_dir = c_filename(dic_dir)?;
+        let userdic = &userdic.map(c_filename).transpose()?;
+        let success = bool_number_to_bool(unsafe {
+            open_jtalk_sys::Mecab_load_with_userdic(
                 self.as_raw_ptr(),
                 dic_dir.as_ptr(),
                 match userdic {
                     Some(userdic) => userdic.as_ptr(),
                     None => std::ptr::null(),
                 },
-            ))
+            )
+        });
+        if !success {
+            return Err(MecabLoadError::Unsuccessful {
+                function: "Mecab_load_with_userdic",
+            });
         }
+        Ok(())
     }
     pub fn get_feature(&self) -> Option<&MecabFeature> {
         unsafe {
@@ -113,11 +129,16 @@ impl Mecab {
     }
 }
 
+fn c_filename(path: &Utf8Path) -> Result<CString, MecabLoadError> {
+    CString::new(path.as_str()).map_err(|_| MecabLoadError::Nul {
+        filename: path.to_owned(),
+    })
+}
+
 #[cfg(test)]
 mod tests {
-    use std::{path::PathBuf, str::FromStr};
-
     use super::*;
+    use camino::Utf8Path;
     use pretty_assertions::{assert_eq, assert_ne};
     use resources::Resource as _;
 
@@ -139,11 +160,12 @@ mod tests {
     #[rstest]
     fn mecab_load_works() {
         let mut mecab = ManagedResource::<Mecab>::initialize();
-        assert!(mecab.load(
-            PathBuf::from_str(std::env!("CARGO_MANIFEST_DIR"))
-                .unwrap()
-                .join("src/mecab/testdata/mecab_load"),
-        ));
+        mecab
+            .load(
+                Utf8Path::new(std::env!("CARGO_MANIFEST_DIR"))
+                    .join("src/mecab/testdata/mecab_load"),
+            )
+            .unwrap();
     }
 
     #[rstest]
@@ -156,11 +178,12 @@ mod tests {
     #[case("h^o-d+e=s/A:2+3+2/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4@2+1&2-1|6+4/J:xx_xx/K:2+2-9",true)]
     fn mecab_analysis_works(#[case] input: &str, #[case] expected: bool) {
         let mut mecab = ManagedResource::<Mecab>::initialize();
-        assert!(mecab.load(
-            PathBuf::from_str(std::env!("CARGO_MANIFEST_DIR"))
-                .unwrap()
-                .join("src/mecab/testdata/mecab_load"),
-        ));
+        mecab
+            .load(
+                Utf8Path::new(std::env!("CARGO_MANIFEST_DIR"))
+                    .join("src/mecab/testdata/mecab_load"),
+            )
+            .unwrap();
         let s = text2mecab(input).unwrap();
         assert_eq!(expected, mecab.analysis(s));
         assert_ne!(0, mecab.get_size());
diff --git a/crates/open_jtalk/src/njd.rs b/crates/open_jtalk/src/njd.rs
index 970f2f4..8928eb1 100644
--- a/crates/open_jtalk/src/njd.rs
+++ b/crates/open_jtalk/src/njd.rs
@@ -74,9 +74,8 @@ impl Njd {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use camino::Utf8Path;
     use resources::Resource as _;
-    use std::path::PathBuf;
-    use std::str::FromStr;
     #[rstest]
     fn njd_initialize_and_clear_works() {
         let mut njd = Njd::default();
@@ -131,11 +130,12 @@ mod tests {
         let mut njd = ManagedResource::<Njd>::initialize();
         let mut mecab = ManagedResource::<Mecab>::initialize();
 
-        assert!(mecab.load(
-            PathBuf::from_str(std::env!("CARGO_MANIFEST_DIR"))
-                .unwrap()
-                .join("src/mecab/testdata/mecab_load"),
-        ));
+        mecab
+            .load(
+                Utf8Path::new(std::env!("CARGO_MANIFEST_DIR"))
+                    .join("src/mecab/testdata/mecab_load"),
+            )
+            .unwrap();
         let s = text2mecab("h^o-d+e=s/A:2+3+2/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4@2+1&2-1|6+4/J:xx_xx/K:2+2-9").unwrap();
         assert!(mecab.analysis(s));
         njd.mecab2njd(mecab.get_feature().unwrap(), mecab.get_size());

From 3eb133922a26027b2aebb5df8f1aff7d03c8ae55 Mon Sep 17 00:00:00 2001
From: Ryo Yamashita <qryxip@gmail.com>
Date: Thu, 8 Feb 2024 14:17:34 +0900
Subject: [PATCH 2/2] =?UTF-8?q?"an"=20=E2=86=92=20"a"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/open_jtalk/src/mecab/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/open_jtalk/src/mecab/mod.rs b/crates/open_jtalk/src/mecab/mod.rs
index 2a4e1cc..42fb31c 100644
--- a/crates/open_jtalk/src/mecab/mod.rs
+++ b/crates/open_jtalk/src/mecab/mod.rs
@@ -10,7 +10,7 @@ use std::{ffi::CString, mem::MaybeUninit};
 pub enum MecabLoadError {
     #[error("`{function}` failed")]
     Unsuccessful { function: &'static str },
-    #[error("file name contained an NUL byte: {filename:?}")]
+    #[error("file name contained a NUL byte: {filename:?}")]
     Nul { filename: Utf8PathBuf },
 }