From cd2df8d13c0369ad6c8ccfe73662a6a306fcacaa Mon Sep 17 00:00:00 2001
From: pandalee99 <1162953505@qq.com>
Date: Wed, 25 Dec 2024 23:46:31 +0800
Subject: [PATCH] clang-format

---
 cpp/fury/util/string_util.cc      | 484 +++++++++---------
 cpp/fury/util/string_util.h       |   6 +-
 cpp/fury/util/string_util_test.cc | 796 +++++++++++++++---------------
 3 files changed, 643 insertions(+), 643 deletions(-)
diff --git a/cpp/fury/util/string_util.cc b/cpp/fury/util/string_util.cc
index 866de3b4bf..d6ae9475d9 100644
--- a/cpp/fury/util/string_util.cc
+++ b/cpp/fury/util/string_util.cc
@@ -33,272 +33,272 @@
 namespace fury {
 
 // Swap bytes to convert from big endian to little endian
-    inline uint16_t swapBytes(uint16_t value) {
-        return (value >> 8) | (value << 8);
-    }
+inline uint16_t swapBytes(uint16_t value) {
+  return (value >> 8) | (value << 8);
+}
 
-    inline void utf16ToUtf8(uint16_t code_unit, char *&output) {
-        if (code_unit < 0x80) {
-            *output++ = static_cast<char>(code_unit);
-        } else if (code_unit < 0x800) {
-            *output++ = static_cast<char>(0xC0 | (code_unit >> 6));
-            *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
-        } else {
-            *output++ = static_cast<char>(0xE0 | (code_unit >> 12));
-            *output++ = static_cast<char>(0x80 | ((code_unit >> 6) & 0x3F));
-            *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+inline void utf16ToUtf8(uint16_t code_unit, char *&output) {
+  if (code_unit < 0x80) {
+    *output++ = static_cast<char>(code_unit);
+  } else if (code_unit < 0x800) {
+    *output++ = static_cast<char>(0xC0 | (code_unit >> 6));
+    *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+  } else {
+    *output++ = static_cast<char>(0xE0 | (code_unit >> 12));
+    *output++ = static_cast<char>(0x80 | ((code_unit >> 6) & 0x3F));
+    *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+  }
+}
+
+inline void utf16SurrogatePairToUtf8(uint16_t high, uint16_t low, char *&utf8) {
+  uint32_t code_point = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
+  *utf8++ = static_cast<char>((code_point >> 18) | 0xF0);
+  *utf8++ = static_cast<char>(((code_point >> 12) & 0x3F) | 0x80);
+  *utf8++ = static_cast<char>(((code_point >> 6) & 0x3F) | 0x80);
+  *utf8++ = static_cast<char>((code_point & 0x3F) | 0x80);
+}
+
+std::u16string utf8ToUtf16SIMD(const std::string &utf8, bool is_little_endian) {
+  std::u16string utf16;
+  utf16.reserve(utf8.size()); // Reserve space to avoid frequent reallocations
+
+  char buffer[64]; // Buffer to hold temporary UTF-16 results
+  char16_t *output =
+      reinterpret_cast<char16_t *>(buffer); // Use char16_t for output
+
+  size_t i = 0;
+  size_t n = utf8.size();
+
+  while (i + 32 <= n) {
+
+    // Now process the characters in 'in' SIMD register
+    for (int j = 0; j < 32; ++j) {
+      uint8_t byte = utf8[i + j];
+
+      if (byte < 0x80) {
+        // 1-byte character (ASCII)
+        *output++ = static_cast<char16_t>(byte);
+      } else if (byte < 0xE0) {
+        // 2-byte character
+        uint16_t utf16_char = ((byte & 0x1F) << 6) | (utf8[i + j + 1] & 0x3F);
+        if (!is_little_endian) {
+          utf16_char = (utf16_char >> 8) |
+                       (utf16_char << 8); // Swap bytes for big-endian
+        }
+        *output++ = utf16_char;
+        ++j;
+      } else if (byte < 0xF0) {
+        // 3-byte character
+        uint16_t utf16_char = ((byte & 0x0F) << 12) |
+                              ((utf8[i + j + 1] & 0x3F) << 6) |
+                              (utf8[i + j + 2] & 0x3F);
+        if (!is_little_endian) {
+          utf16_char = (utf16_char >> 8) |
+                       (utf16_char << 8); // Swap bytes for big-endian
+        }
+        *output++ = utf16_char;
+        j += 2;
+      } else {
+        // 4-byte character (surrogate pair handling required)
+        uint32_t code_point =
+            ((byte & 0x07) << 18) | ((utf8[i + j + 1] & 0x3F) << 12) |
+            ((utf8[i + j + 2] & 0x3F) << 6) | (utf8[i + j + 3] & 0x3F);
+
+        // Convert the code point to a surrogate pair
+        uint16_t high_surrogate = 0xD800 + ((code_point - 0x10000) >> 10);
+        uint16_t low_surrogate = 0xDC00 + (code_point & 0x3FF);
+
+        if (!is_little_endian) {
+          high_surrogate = (high_surrogate >> 8) |
+                           (high_surrogate << 8); // Swap bytes for big-endian
+          low_surrogate = (low_surrogate >> 8) |
+                          (low_surrogate << 8); // Swap bytes for big-endian
         }
-    }
 
-    inline void utf16SurrogatePairToUtf8(uint16_t high, uint16_t low, char *&utf8) {
-        uint32_t code_point = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
-        *utf8++ = static_cast<char>((code_point >> 18) | 0xF0);
-        *utf8++ = static_cast<char>(((code_point >> 12) & 0x3F) | 0x80);
-        *utf8++ = static_cast<char>(((code_point >> 6) & 0x3F) | 0x80);
-        *utf8++ = static_cast<char>((code_point & 0x3F) | 0x80);
+        *output++ = high_surrogate;
+        *output++ = low_surrogate;
+
+        j += 3;
+      }
     }
 
-    std::u16string utf8ToUtf16SIMD(const std::string &utf8, bool is_little_endian) {
-        std::u16string utf16;
-        utf16.reserve(utf8.size()); // Reserve space to avoid frequent reallocations
-
-        char buffer[64]; // Buffer to hold temporary UTF-16 results
-        char16_t *output =
-                reinterpret_cast<char16_t *>(buffer); // Use char16_t for output
-
-        size_t i = 0;
-        size_t n = utf8.size();
-
-        while (i + 32 <= n) {
-
-            // Now process the characters in 'in' SIMD register
-            for (int j = 0; j < 32; ++j) {
-                uint8_t byte = utf8[i + j];
-
-                if (byte < 0x80) {
-                    // 1-byte character (ASCII)
-                    *output++ = static_cast<char16_t>(byte);
-                } else if (byte < 0xE0) {
-                    // 2-byte character
-                    uint16_t utf16_char = ((byte & 0x1F) << 6) | (utf8[i + j + 1] & 0x3F);
-                    if (!is_little_endian) {
-                        utf16_char = (utf16_char >> 8) |
-                                     (utf16_char << 8); // Swap bytes for big-endian
-                    }
-                    *output++ = utf16_char;
-                    ++j;
-                } else if (byte < 0xF0) {
-                    // 3-byte character
-                    uint16_t utf16_char = ((byte & 0x0F) << 12) |
-                                          ((utf8[i + j + 1] & 0x3F) << 6) |
-                                          (utf8[i + j + 2] & 0x3F);
-                    if (!is_little_endian) {
-                        utf16_char = (utf16_char >> 8) |
-                                     (utf16_char << 8); // Swap bytes for big-endian
-                    }
-                    *output++ = utf16_char;
-                    j += 2;
-                } else {
-                    // 4-byte character (surrogate pair handling required)
-                    uint32_t code_point =
-                            ((byte & 0x07) << 18) | ((utf8[i + j + 1] & 0x3F) << 12) |
-                            ((utf8[i + j + 2] & 0x3F) << 6) | (utf8[i + j + 3] & 0x3F);
-
-                    // Convert the code point to a surrogate pair
-                    uint16_t high_surrogate = 0xD800 + ((code_point - 0x10000) >> 10);
-                    uint16_t low_surrogate = 0xDC00 + (code_point & 0x3FF);
-
-                    if (!is_little_endian) {
-                        high_surrogate = (high_surrogate >> 8) |
-                                         (high_surrogate << 8); // Swap bytes for big-endian
-                        low_surrogate = (low_surrogate >> 8) |
-                                        (low_surrogate << 8); // Swap bytes for big-endian
-                    }
-
-                    *output++ = high_surrogate;
-                    *output++ = low_surrogate;
-
-                    j += 3;
-                }
-            }
-
-            // Append the processed buffer to the final utf16 string
-            utf16.append(reinterpret_cast<char16_t *>(buffer),
-                         output - reinterpret_cast<char16_t *>(buffer));
-            output =
-                    reinterpret_cast<char16_t *>(buffer); // Reset output buffer pointer
-            i += 32;
-        }
+    // Append the processed buffer to the final utf16 string
+    utf16.append(reinterpret_cast<char16_t *>(buffer),
+                 output - reinterpret_cast<char16_t *>(buffer));
+    output =
+        reinterpret_cast<char16_t *>(buffer); // Reset output buffer pointer
+    i += 32;
+  }
 
-        // Handle remaining characters
-        while (i < n) {
-            uint8_t byte = utf8[i];
-
-            if (byte < 0x80) {
-                *output++ = static_cast<char16_t>(byte);
-            } else if (byte < 0xE0) {
-                uint16_t utf16_char = ((byte & 0x1F) << 6) | (utf8[i + 1] & 0x3F);
-                if (!is_little_endian) {
-                    utf16_char =
-                            (utf16_char >> 8) | (utf16_char << 8); // Swap bytes for big-endian
-                }
-                *output++ = utf16_char;
-                ++i;
-            } else if (byte < 0xF0) {
-                uint16_t utf16_char = ((byte & 0x0F) << 12) |
-                                      ((utf8[i + 1] & 0x3F) << 6) | (utf8[i + 2] & 0x3F);
-                if (!is_little_endian) {
-                    utf16_char =
-                            (utf16_char >> 8) | (utf16_char << 8); // Swap bytes for big-endian
-                }
-                *output++ = utf16_char;
-                i += 2;
-            } else {
-                uint32_t code_point = ((byte & 0x07) << 18) |
-                                      ((utf8[i + 1] & 0x3F) << 12) |
-                                      ((utf8[i + 2] & 0x3F) << 6) | (utf8[i + 3] & 0x3F);
-
-                uint16_t high_surrogate = 0xD800 + ((code_point - 0x10000) >> 10);
-                uint16_t low_surrogate = 0xDC00 + (code_point & 0x3FF);
-
-                if (!is_little_endian) {
-                    high_surrogate = (high_surrogate >> 8) | (high_surrogate << 8);
-                    low_surrogate = (low_surrogate >> 8) | (low_surrogate << 8);
-                }
-
-                *output++ = high_surrogate;
-                *output++ = low_surrogate;
-
-                i += 3;
-            }
-
-            ++i;
-        }
+  // Handle remaining characters
+  while (i < n) {
+    uint8_t byte = utf8[i];
 
-        // Append the last part of the buffer to the utf16 string
-        utf16.append(reinterpret_cast<char16_t *>(buffer),
-                     output - reinterpret_cast<char16_t *>(buffer));
+    if (byte < 0x80) {
+      *output++ = static_cast<char16_t>(byte);
+    } else if (byte < 0xE0) {
+      uint16_t utf16_char = ((byte & 0x1F) << 6) | (utf8[i + 1] & 0x3F);
+      if (!is_little_endian) {
+        utf16_char =
+            (utf16_char >> 8) | (utf16_char << 8); // Swap bytes for big-endian
+      }
+      *output++ = utf16_char;
+      ++i;
+    } else if (byte < 0xF0) {
+      uint16_t utf16_char = ((byte & 0x0F) << 12) |
+                            ((utf8[i + 1] & 0x3F) << 6) | (utf8[i + 2] & 0x3F);
+      if (!is_little_endian) {
+        utf16_char =
+            (utf16_char >> 8) | (utf16_char << 8); // Swap bytes for big-endian
+      }
+      *output++ = utf16_char;
+      i += 2;
+    } else {
+      uint32_t code_point = ((byte & 0x07) << 18) |
+                            ((utf8[i + 1] & 0x3F) << 12) |
+                            ((utf8[i + 2] & 0x3F) << 6) | (utf8[i + 3] & 0x3F);
 
-        return utf16;
+      uint16_t high_surrogate = 0xD800 + ((code_point - 0x10000) >> 10);
+      uint16_t low_surrogate = 0xDC00 + (code_point & 0x3FF);
+
+      if (!is_little_endian) {
+        high_surrogate = (high_surrogate >> 8) | (high_surrogate << 8);
+        low_surrogate = (low_surrogate >> 8) | (low_surrogate << 8);
+      }
+
+      *output++ = high_surrogate;
+      *output++ = low_surrogate;
+
+      i += 3;
     }
 
+    ++i;
+  }
+
+  // Append the last part of the buffer to the utf16 string
+  utf16.append(reinterpret_cast<char16_t *>(buffer),
+               output - reinterpret_cast<char16_t *>(buffer));
+
+  return utf16;
+}
+
 #if defined(__x86_64__) || defined(_M_X64)
 
-    bool isLatin(const std::string &str) {
-        const char *data = str.data();
-        size_t len = str.size();
-
-        size_t i = 0;
-        __m256i latin_mask = _mm256_set1_epi8(0x80);
-        for (; i + 32 <= len; i += 32) {
-            __m256i chars =
-                    _mm256_loadu_si256(reinterpret_cast<const __m256i *>(data + i));
-            __m256i result = _mm256_and_si256(chars, latin_mask);
-            if (!_mm256_testz_si256(result, result)) {
-                return false;
-            }
-        }
+bool isLatin(const std::string &str) {
+  const char *data = str.data();
+  size_t len = str.size();
 
-        for (; i < len; ++i) {
-            if (static_cast<unsigned char>(data[i]) >= 128) {
-                return false;
-            }
-        }
+  size_t i = 0;
+  __m256i latin_mask = _mm256_set1_epi8(0x80);
+  for (; i + 32 <= len; i += 32) {
+    __m256i chars =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(data + i));
+    __m256i result = _mm256_and_si256(chars, latin_mask);
+    if (!_mm256_testz_si256(result, result)) {
+      return false;
+    }
+  }
+
+  for (; i < len; ++i) {
+    if (static_cast<unsigned char>(data[i]) >= 128) {
+      return false;
+    }
+  }
+
+  return true;
+}
 
-        return true;
+std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian) {
+  std::string utf8;
+  utf8.reserve(utf16.size() *
+               3); // Reserve enough space to avoid frequent reallocations
+
+  const __m256i limit1 = _mm256_set1_epi16(0x80);
+  const __m256i limit2 = _mm256_set1_epi16(0x800);
+  const __m256i surrogate_high_start = _mm256_set1_epi16(0xD800);
+  const __m256i surrogate_high_end = _mm256_set1_epi16(0xDBFF);
+  const __m256i surrogate_low_start = _mm256_set1_epi16(0xDC00);
+  const __m256i surrogate_low_end = _mm256_set1_epi16(0xDFFF);
+
+  char buffer[64]; // Buffer to hold temporary UTF-8 bytes
+  char *output = buffer;
+
+  size_t i = 0;
+  size_t n = utf16.size();
+
+  while (i + 16 <= n) {
+    __m256i in =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(utf16.data() + i));
+
+    if (!is_little_endian) {
+      in = _mm256_or_si256(
+          _mm256_slli_epi16(in, 8),
+          _mm256_srli_epi16(in, 8)); // Swap bytes for big-endian
     }
 
-    std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian) {
-        std::string utf8;
-        utf8.reserve(utf16.size() *
-                     3); // Reserve enough space to avoid frequent reallocations
-
-        const __m256i limit1 = _mm256_set1_epi16(0x80);
-        const __m256i limit2 = _mm256_set1_epi16(0x800);
-        const __m256i surrogate_high_start = _mm256_set1_epi16(0xD800);
-        const __m256i surrogate_high_end = _mm256_set1_epi16(0xDBFF);
-        const __m256i surrogate_low_start = _mm256_set1_epi16(0xDC00);
-        const __m256i surrogate_low_end = _mm256_set1_epi16(0xDFFF);
-
-        char buffer[64]; // Buffer to hold temporary UTF-8 bytes
-        char *output = buffer;
-
-        size_t i = 0;
-        size_t n = utf16.size();
-
-        while (i + 16 <= n) {
-            __m256i in =
-                    _mm256_loadu_si256(reinterpret_cast<const __m256i *>(utf16.data() + i));
-
-            if (!is_little_endian) {
-                in = _mm256_or_si256(
-                        _mm256_slli_epi16(in, 8),
-                        _mm256_srli_epi16(in, 8)); // Swap bytes for big-endian
-            }
-
-            __m256i mask1 = _mm256_cmpgt_epi16(in, limit1);
-            __m256i mask2 = _mm256_cmpgt_epi16(in, limit2);
-            __m256i high_surrogate_mask =
-                    _mm256_and_si256(_mm256_cmpgt_epi16(in, surrogate_high_start),
-                                     _mm256_cmpgt_epi16(in, surrogate_high_end));
-            __m256i low_surrogate_mask =
-                    _mm256_and_si256(_mm256_cmpgt_epi16(in, surrogate_low_start),
-                                     _mm256_cmpgt_epi16(in, surrogate_low_end));
-
-            if (_mm256_testz_si256(mask1, mask1)) {
-                // All values < 0x80, 1 byte per character
-                for (int j = 0; j < 16; ++j) {
-                    *output++ = static_cast<char>(utf16[i + j]);
-                }
-            } else if (_mm256_testz_si256(mask2, mask2)) {
-                // All values < 0x800, 2 bytes per character
-                for (int j = 0; j < 16; ++j) {
-                    utf16ToUtf8(utf16[i + j], output);
-                }
-            } else {
-                // Mix of 1, 2, and 3 byte characters
-                for (int j = 0; j < 16; ++j) {
-                    if (_mm256_testz_si256(high_surrogate_mask, high_surrogate_mask) &&
-                        j + 1 < 16 &&
-                        !_mm256_testz_si256(low_surrogate_mask, low_surrogate_mask)) {
-                        // Surrogate pair
-                        utf16SurrogatePairToUtf8(utf16[i + j], utf16[i + j + 1], output);
-                        ++j;
-                    } else {
-                        utf16ToUtf8(utf16[i + j], output);
-                    }
-                }
-            }
-
-            utf8.append(buffer, output - buffer);
-            output = buffer; // Reset output buffer pointer
-            i += 16;
+    __m256i mask1 = _mm256_cmpgt_epi16(in, limit1);
+    __m256i mask2 = _mm256_cmpgt_epi16(in, limit2);
+    __m256i high_surrogate_mask =
+        _mm256_and_si256(_mm256_cmpgt_epi16(in, surrogate_high_start),
+                         _mm256_cmpgt_epi16(in, surrogate_high_end));
+    __m256i low_surrogate_mask =
+        _mm256_and_si256(_mm256_cmpgt_epi16(in, surrogate_low_start),
+                         _mm256_cmpgt_epi16(in, surrogate_low_end));
+
+    if (_mm256_testz_si256(mask1, mask1)) {
+      // All values < 0x80, 1 byte per character
+      for (int j = 0; j < 16; ++j) {
+        *output++ = static_cast<char>(utf16[i + j]);
+      }
+    } else if (_mm256_testz_si256(mask2, mask2)) {
+      // All values < 0x800, 2 bytes per character
+      for (int j = 0; j < 16; ++j) {
+        utf16ToUtf8(utf16[i + j], output);
+      }
+    } else {
+      // Mix of 1, 2, and 3 byte characters
+      for (int j = 0; j < 16; ++j) {
+        if (_mm256_testz_si256(high_surrogate_mask, high_surrogate_mask) &&
+            j + 1 < 16 &&
+            !_mm256_testz_si256(low_surrogate_mask, low_surrogate_mask)) {
+          // Surrogate pair
+          utf16SurrogatePairToUtf8(utf16[i + j], utf16[i + j + 1], output);
+          ++j;
+        } else {
+          utf16ToUtf8(utf16[i + j], output);
         }
+      }
+    }
 
-        // Handle remaining characters
-        while (i < n) {
-            if (i + 1 < n && utf16[i] >= 0xD800 && utf16[i] <= 0xDBFF &&
-                utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
-                // Surrogate pair
-                utf16SurrogatePairToUtf8(utf16[i], utf16[i + 1], output);
-                ++i;
-            } else {
-                utf16ToUtf8(utf16[i], output);
-            }
-            ++i;
-        }
-        utf8.append(buffer, output - buffer);
+    utf8.append(buffer, output - buffer);
+    output = buffer; // Reset output buffer pointer
+    i += 16;
+  }
 
-        return utf8;
+  // Handle remaining characters
+  while (i < n) {
+    if (i + 1 < n && utf16[i] >= 0xD800 && utf16[i] <= 0xDBFF &&
+        utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
+      // Surrogate pair
+      utf16SurrogatePairToUtf8(utf16[i], utf16[i + 1], output);
+      ++i;
+    } else {
+      utf16ToUtf8(utf16[i], output);
     }
+    ++i;
+  }
+  utf8.append(buffer, output - buffer);
 
-    std::u16string utf8ToUtf16(const std::string &utf8, bool is_little_endian) {
-        return utf8ToUtf16SIMD(utf8, is_little_endian);
-    }
+  return utf8;
+}
+
+std::u16string utf8ToUtf16(const std::string &utf8, bool is_little_endian) {
+  return utf8ToUtf16SIMD(utf8, is_little_endian);
+}
 
 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
 
-    bool isLatin(const std::string &str) {
+bool isLatin(const std::string &str) {
   const char *data = str.data();
   size_t len = str.size();
 
diff --git a/cpp/fury/util/string_util.h b/cpp/fury/util/string_util.h
index 8a200afe38..d5bdde543a 100644
--- a/cpp/fury/util/string_util.h
+++ b/cpp/fury/util/string_util.h
@@ -23,10 +23,10 @@
 
 namespace fury {
 
-    bool isLatin(const std::string &str);
+bool isLatin(const std::string &str);
 
-    std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian);
+std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian);
 
-    std::u16string utf8ToUtf16(const std::string &utf8, bool is_little_endian);
+std::u16string utf8ToUtf16(const std::string &utf8, bool is_little_endian);
 
 } // namespace fury
\ No newline at end of file
diff --git a/cpp/fury/util/string_util_test.cc b/cpp/fury/util/string_util_test.cc
index 69a6c5c457..60c72c80b3 100644
--- a/cpp/fury/util/string_util_test.cc
+++ b/cpp/fury/util/string_util_test.cc
@@ -30,516 +30,516 @@
 namespace fury {
 
 // Function to generate a random string
-    std::string generateRandomString(size_t length) {
-        const char charset[] =
-                "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
-        std::default_random_engine rng(std::random_device{}());
-        std::uniform_int_distribution<> dist(0, sizeof(charset) - 2);
-
-        std::string result;
-        result.reserve(length);
-        for (size_t i = 0; i < length; ++i) {
-            result += charset[dist(rng)];
-        }
-
-        return result;
-    }
+std::string generateRandomString(size_t length) {
+  const char charset[] =
+      "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+  std::default_random_engine rng(std::random_device{}());
+  std::uniform_int_distribution<> dist(0, sizeof(charset) - 2);
+
+  std::string result;
+  result.reserve(length);
+  for (size_t i = 0; i < length; ++i) {
+    result += charset[dist(rng)];
+  }
 
-    bool isLatin_BaseLine(const std::string &str) {
-        for (char c : str) {
-            if (static_cast<unsigned char>(c) >= 128) {
-                return false;
-            }
-        }
-        return true;
+  return result;
+}
+
+bool isLatin_BaseLine(const std::string &str) {
+  for (char c : str) {
+    if (static_cast<unsigned char>(c) >= 128) {
+      return false;
     }
+  }
+  return true;
+}
 
-    TEST(StringUtilTest, TestIsLatinFunctions) {
-    std::string testStr = generateRandomString(100000);
-    auto start_time = std::chrono::high_resolution_clock::now();
-    bool result = isLatin_BaseLine(testStr);
-    auto end_time = std::chrono::high_resolution_clock::now();
-    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
-            end_time - start_time)
-            .count();
-    FURY_LOG(INFO) << "BaseLine Running Time: " << duration << " ns.";
-
-    start_time = std::chrono::high_resolution_clock::now();
-    result = isLatin(testStr);
-    end_time = std::chrono::high_resolution_clock::now();
-    duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time -
-                                                                    start_time)
-            .count();
-    FURY_LOG(INFO) << "Optimized Running Time: " << duration << " ns.";
-
-    EXPECT_TRUE(result);
+TEST(StringUtilTest, TestIsLatinFunctions) {
+  std::string testStr = generateRandomString(100000);
+  auto start_time = std::chrono::high_resolution_clock::now();
+  bool result = isLatin_BaseLine(testStr);
+  auto end_time = std::chrono::high_resolution_clock::now();
+  auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                      end_time - start_time)
+                      .count();
+  FURY_LOG(INFO) << "BaseLine Running Time: " << duration << " ns.";
+
+  start_time = std::chrono::high_resolution_clock::now();
+  result = isLatin(testStr);
+  end_time = std::chrono::high_resolution_clock::now();
+  duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time -
+                                                                  start_time)
+                 .count();
+  FURY_LOG(INFO) << "Optimized Running Time: " << duration << " ns.";
+
+  EXPECT_TRUE(result);
 }
 
 TEST(StringUtilTest, TestIsLatinLogic) {
-// Test strings with only Latin characters
-EXPECT_TRUE(isLatin("Fury"));
-EXPECT_TRUE(isLatin(generateRandomString(80)));
-
-// Test unaligned strings with only Latin characters
-EXPECT_TRUE(isLatin(generateRandomString(80) + "1"));
-EXPECT_TRUE(isLatin(generateRandomString(80) + "12"));
-EXPECT_TRUE(isLatin(generateRandomString(80) + "123"));
-
-// Test strings with non-Latin characters
-EXPECT_FALSE(isLatin("你好, Fury"));
-EXPECT_FALSE(isLatin(generateRandomString(80) + "你好"));
-EXPECT_FALSE(isLatin(generateRandomString(80) + "1你好"));
-EXPECT_FALSE(isLatin(generateRandomString(11) + "你"));
-EXPECT_FALSE(isLatin(generateRandomString(10) + "你好"));
-EXPECT_FALSE(isLatin(generateRandomString(9) + "性能好"));
-EXPECT_FALSE(isLatin("\u1234"));
-EXPECT_FALSE(isLatin("a\u1234"));
-EXPECT_FALSE(isLatin("ab\u1234"));
-EXPECT_FALSE(isLatin("abc\u1234"));
-EXPECT_FALSE(isLatin("abcd\u1234"));
-EXPECT_FALSE(isLatin("Javaone Keynote\u1234"));
+  // Test strings with only Latin characters
+  EXPECT_TRUE(isLatin("Fury"));
+  EXPECT_TRUE(isLatin(generateRandomString(80)));
+
+  // Test unaligned strings with only Latin characters
+  EXPECT_TRUE(isLatin(generateRandomString(80) + "1"));
+  EXPECT_TRUE(isLatin(generateRandomString(80) + "12"));
+  EXPECT_TRUE(isLatin(generateRandomString(80) + "123"));
+
+  // Test strings with non-Latin characters
+  EXPECT_FALSE(isLatin("你好, Fury"));
+  EXPECT_FALSE(isLatin(generateRandomString(80) + "你好"));
+  EXPECT_FALSE(isLatin(generateRandomString(80) + "1你好"));
+  EXPECT_FALSE(isLatin(generateRandomString(11) + "你"));
+  EXPECT_FALSE(isLatin(generateRandomString(10) + "你好"));
+  EXPECT_FALSE(isLatin(generateRandomString(9) + "性能好"));
+  EXPECT_FALSE(isLatin("\u1234"));
+  EXPECT_FALSE(isLatin("a\u1234"));
+  EXPECT_FALSE(isLatin("ab\u1234"));
+  EXPECT_FALSE(isLatin("abc\u1234"));
+  EXPECT_FALSE(isLatin("abcd\u1234"));
+  EXPECT_FALSE(isLatin("Javaone Keynote\u1234"));
 }
 
 // Generate random UTF-16 string ensuring valid surrogate pairs
 std::u16string generateRandomUTF16String(size_t length) {
-    std::u16string str;
-    std::mt19937 generator(std::random_device{}());
-    std::uniform_int_distribution<uint32_t> distribution(0, 0x10FFFF);
-
-    while (str.size() < length) {
-        uint32_t code_point = distribution(generator);
-
-        if (code_point <= 0xD7FF ||
-            (code_point >= 0xE000 && code_point <= 0xFFFF)) {
-            str.push_back(static_cast<char16_t>(code_point));
-        } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
-            code_point -= 0x10000;
-            str.push_back(static_cast<char16_t>((code_point >> 10) + 0xD800));
-            str.push_back(static_cast<char16_t>((code_point & 0x3FF) + 0xDC00));
-        }
+  std::u16string str;
+  std::mt19937 generator(std::random_device{}());
+  std::uniform_int_distribution<uint32_t> distribution(0, 0x10FFFF);
+
+  while (str.size() < length) {
+    uint32_t code_point = distribution(generator);
+
+    if (code_point <= 0xD7FF ||
+        (code_point >= 0xE000 && code_point <= 0xFFFF)) {
+      str.push_back(static_cast<char16_t>(code_point));
+    } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
+      code_point -= 0x10000;
+      str.push_back(static_cast<char16_t>((code_point >> 10) + 0xD800));
+      str.push_back(static_cast<char16_t>((code_point & 0x3FF) + 0xDC00));
     }
+  }
 
-    return str;
+  return str;
 }
 
 // Basic implementation
 
 // Swap bytes to convert from big endian to little endian
 inline uint16_t swapBytes(uint16_t value) {
-    return (value >> 8) | (value << 8);
+  return (value >> 8) | (value << 8);
 }
 
 inline void utf16ToUtf8(uint16_t code_unit, char *&output) {
-    if (code_unit < 0x80) {
-        *output++ = static_cast<char>(code_unit);
-    } else if (code_unit < 0x800) {
-        *output++ = static_cast<char>(0xC0 | (code_unit >> 6));
-        *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
-    } else {
-        *output++ = static_cast<char>(0xE0 | (code_unit >> 12));
-        *output++ = static_cast<char>(0x80 | ((code_unit >> 6) & 0x3F));
-        *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
-    }
+  if (code_unit < 0x80) {
+    *output++ = static_cast<char>(code_unit);
+  } else if (code_unit < 0x800) {
+    *output++ = static_cast<char>(0xC0 | (code_unit >> 6));
+    *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+  } else {
+    *output++ = static_cast<char>(0xE0 | (code_unit >> 12));
+    *output++ = static_cast<char>(0x80 | ((code_unit >> 6) & 0x3F));
+    *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+  }
 }
 
 inline void utf16SurrogatePairToUtf8(uint16_t high, uint16_t low, char *&utf8) {
-    uint32_t code_point = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
-    *utf8++ = static_cast<char>((code_point >> 18) | 0xF0);
-    *utf8++ = static_cast<char>(((code_point >> 12) & 0x3F) | 0x80);
-    *utf8++ = static_cast<char>(((code_point >> 6) & 0x3F) | 0x80);
-    *utf8++ = static_cast<char>((code_point & 0x3F) | 0x80);
+  uint32_t code_point = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
+  *utf8++ = static_cast<char>((code_point >> 18) | 0xF0);
+  *utf8++ = static_cast<char>(((code_point >> 12) & 0x3F) | 0x80);
+  *utf8++ = static_cast<char>(((code_point >> 6) & 0x3F) | 0x80);
+  *utf8++ = static_cast<char>((code_point & 0x3F) | 0x80);
 }
 
 std::string utf16ToUtf8BaseLine(const std::u16string &utf16,
                                 bool is_little_endian) {
-    std::string utf8;
-    utf8.reserve(utf16.size() *
-                 3); // Reserve enough space to avoid frequent reallocations
-
-    size_t i = 0;
-    size_t n = utf16.size();
-    char buffer[4]; // Buffer to hold temporary UTF-8 bytes
-    char *output = buffer;
-
-    while (i < n) {
-        uint16_t code_unit = utf16[i];
-        if (!is_little_endian) {
-            code_unit = swapBytes(code_unit);
-        }
-        if (i + 1 < n && code_unit >= 0xD800 && code_unit <= 0xDBFF &&
-            utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
-            // Surrogate pair
-            uint16_t high = code_unit;
-            uint16_t low = utf16[i + 1];
-            if (!is_little_endian) {
-                low = swapBytes(low);
-            }
-            utf16SurrogatePairToUtf8(high, low, output);
-            utf8.append(buffer, output - buffer);
-            output = buffer;
-            ++i;
-        } else {
-            utf16ToUtf8(code_unit, output);
-            utf8.append(buffer, output - buffer);
-            output = buffer;
-        }
-        ++i;
+  std::string utf8;
+  utf8.reserve(utf16.size() *
+               3); // Reserve enough space to avoid frequent reallocations
+
+  size_t i = 0;
+  size_t n = utf16.size();
+  char buffer[4]; // Buffer to hold temporary UTF-8 bytes
+  char *output = buffer;
+
+  while (i < n) {
+    uint16_t code_unit = utf16[i];
+    if (!is_little_endian) {
+      code_unit = swapBytes(code_unit);
+    }
+    if (i + 1 < n && code_unit >= 0xD800 && code_unit <= 0xDBFF &&
+        utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
+      // Surrogate pair
+      uint16_t high = code_unit;
+      uint16_t low = utf16[i + 1];
+      if (!is_little_endian) {
+        low = swapBytes(low);
+      }
+      utf16SurrogatePairToUtf8(high, low, output);
+      utf8.append(buffer, output - buffer);
+      output = buffer;
+      ++i;
+    } else {
+      utf16ToUtf8(code_unit, output);
+      utf8.append(buffer, output - buffer);
+      output = buffer;
     }
-    return utf8;
+    ++i;
+  }
+  return utf8;
 }
 
 // Testing Basic Logic
 TEST(UTF16ToUTF8Test, BasicConversion) {
-std::u16string utf16 = u"Hello, 世界!";
-std::string utf8 = fury::utf16ToUtf8(utf16, true);
-ASSERT_EQ(utf8, u8"Hello, 世界!");
+  std::u16string utf16 = u"Hello, 世界!";
+  std::string utf8 = fury::utf16ToUtf8(utf16, true);
+  ASSERT_EQ(utf8, u8"Hello, 世界!");
 }
 
 // Testing Empty String
 TEST(UTF16ToUTF8Test, EmptyString) {
-std::u16string utf16 = u"";
-std::string utf8 = fury::utf16ToUtf8(utf16, true);
-ASSERT_EQ(utf8, "");
+  std::u16string utf16 = u"";
+  std::string utf8 = fury::utf16ToUtf8(utf16, true);
+  ASSERT_EQ(utf8, "");
 }
 
 // Testing emoji
 TEST(UTF16ToUTF8Test, SurrogatePairs) {
-std::u16string utf16 = {0xD83D, 0xDE00}; // 😀 emoji
-std::string utf8 = fury::utf16ToUtf8(utf16, true);
-ASSERT_EQ(utf8, "\xF0\x9F\x98\x80");
+  std::u16string utf16 = {0xD83D, 0xDE00}; // 😀 emoji
+  std::string utf8 = fury::utf16ToUtf8(utf16, true);
+  ASSERT_EQ(utf8, "\xF0\x9F\x98\x80");
 }
 
 // Testing Boundary
 TEST(UTF16ToUTF8Test, BoundaryValues) {
-std::u16string utf16 = {0x0000, 0xFFFF};
-std::string utf8 = fury::utf16ToUtf8(utf16, true);
-std::string expected_utf8 = std::string("\x00", 1) + "\xEF\xBF\xBF";
-ASSERT_EQ(utf8, expected_utf8);
+  std::u16string utf16 = {0x0000, 0xFFFF};
+  std::string utf8 = fury::utf16ToUtf8(utf16, true);
+  std::string expected_utf8 = std::string("\x00", 1) + "\xEF\xBF\xBF";
+  ASSERT_EQ(utf8, expected_utf8);
 }
 
 // Testing Special Characters
 TEST(UTF16ToUTF8Test, SpecialCharacters) {
-std::u16string utf16 = u" \n\t";
-std::string utf8 = fury::utf16ToUtf8(utf16, true);
-ASSERT_EQ(utf8, " \n\t");
+  std::u16string utf16 = u" \n\t";
+  std::string utf8 = fury::utf16ToUtf8(utf16, true);
+  ASSERT_EQ(utf8, " \n\t");
 }
 
 // Testing LittleEndian
 TEST(UTF16ToUTF8Test, LittleEndian) {
-std::u16string utf16 = {0x61, 0x62}; // "ab"
-std::string utf8 = fury::utf16ToUtf8(utf16, true);
-ASSERT_EQ(utf8, "ab");
+  std::u16string utf16 = {0x61, 0x62}; // "ab"
+  std::string utf8 = fury::utf16ToUtf8(utf16, true);
+  ASSERT_EQ(utf8, "ab");
 }
 
 // Testing BigEndian
 TEST(UTF16ToUTF8Test, BigEndian) {
-std::u16string utf16 = {0xFFFE, 0xFFFE};
-std::string utf8 = fury::utf16ToUtf8(utf16, false);
-ASSERT_EQ(utf8, "\xEF\xBF\xBE\xEF\xBF\xBE");
+  std::u16string utf16 = {0xFFFE, 0xFFFE};
+  std::string utf8 = fury::utf16ToUtf8(utf16, false);
+  ASSERT_EQ(utf8, "\xEF\xBF\xBE\xEF\xBF\xBE");
 }
 
 // Testing Performance
 TEST(UTF16ToUTF8Test, PerformanceTest) {
-const size_t num_tests = 1000;
-const size_t string_length = 1000;
-// Default little_endian
-bool is_little_endian = true;
-
-// Random UTF-16
-std::vector<std::u16string> test_strings;
-for (size_t i = 0; i < num_tests; ++i) {
-test_strings.push_back(generateRandomUTF16String(string_length));
-}
-
-// Lib
-try {
-auto start_time = std::chrono::high_resolution_clock::now();
-for (const auto &str : test_strings) {
-std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
-std::string utf8 = convert.to_bytes(str);
-}
-auto end_time = std::chrono::high_resolution_clock::now();
-auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
-        end_time - start_time)
-        .count();
-FURY_LOG(INFO) << "Standard library Running Time: " << duration << " ns";
-} catch (const std::exception &e) {
-FURY_LOG(FATAL) << "Caught exception: " << e.what();
-}
-
-// BaseLine
-try {
-auto start_time = std::chrono::high_resolution_clock::now();
-for (const auto &str : test_strings) {
-std::string utf8 = utf16ToUtf8BaseLine(str, is_little_endian);
-}
-auto end_time = std::chrono::high_resolution_clock::now();
-auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
-        end_time - start_time)
-        .count();
-FURY_LOG(INFO) << "Baseline Running Time: " << duration << " ns";
-} catch (const std::exception &e) {
-FURY_LOG(FATAL) << "Caught exception: " << e.what();
-}
-
-// SIMD
-try {
-auto start_time = std::chrono::high_resolution_clock::now();
-for (const auto &str : test_strings) {
-std::string utf8 = fury::utf16ToUtf8(str, is_little_endian);
-}
-auto end_time = std::chrono::high_resolution_clock::now();
-auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
-        end_time - start_time)
-        .count();
-FURY_LOG(INFO) << "SIMD Running Time: " << duration << " ns";
-} catch (const std::exception &e) {
-FURY_LOG(FATAL) << "Caught exception: " << e.what();
-}
+  const size_t num_tests = 1000;
+  const size_t string_length = 1000;
+  // Default little_endian
+  bool is_little_endian = true;
+
+  // Random UTF-16
+  std::vector<std::u16string> test_strings;
+  for (size_t i = 0; i < num_tests; ++i) {
+    test_strings.push_back(generateRandomUTF16String(string_length));
+  }
+
+  // Lib
+  try {
+    auto start_time = std::chrono::high_resolution_clock::now();
+    for (const auto &str : test_strings) {
+      std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
+      std::string utf8 = convert.to_bytes(str);
+    }
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                        end_time - start_time)
+                        .count();
+    FURY_LOG(INFO) << "Standard library Running Time: " << duration << " ns";
+  } catch (const std::exception &e) {
+    FURY_LOG(FATAL) << "Caught exception: " << e.what();
+  }
+
+  // BaseLine
+  try {
+    auto start_time = std::chrono::high_resolution_clock::now();
+    for (const auto &str : test_strings) {
+      std::string utf8 = utf16ToUtf8BaseLine(str, is_little_endian);
+    }
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                        end_time - start_time)
+                        .count();
+    FURY_LOG(INFO) << "Baseline Running Time: " << duration << " ns";
+  } catch (const std::exception &e) {
+    FURY_LOG(FATAL) << "Caught exception: " << e.what();
+  }
+
+  // SIMD
+  try {
+    auto start_time = std::chrono::high_resolution_clock::now();
+    for (const auto &str : test_strings) {
+      std::string utf8 = fury::utf16ToUtf8(str, is_little_endian);
+    }
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                        end_time - start_time)
+                        .count();
+    FURY_LOG(INFO) << "SIMD Running Time: " << duration << " ns";
+  } catch (const std::exception &e) {
+    FURY_LOG(FATAL) << "Caught exception: " << e.what();
+  }
 }
 
 // Generate random UTF-8 string
 std::string generateRandomUTF8String(size_t length) {
-    std::string str;
-    std::mt19937 generator(std::random_device{}());
-    std::uniform_int_distribution<uint32_t> distribution(0, 0x10FFFF);
-
-    while (str.size() < length) {
-        uint32_t code_point = distribution(generator);
-
-        // Skip surrogate pairs (0xD800 to 0xDFFF) and other invalid Unicode code
-        // points
-        if ((code_point >= 0xD800 && code_point <= 0xDFFF) ||
-            code_point > 0x10FFFF) {
-            continue;
-        }
-
-        if (code_point <= 0x7F) {
-            str.push_back(static_cast<char>(code_point));
-        } else if (code_point <= 0x7FF) {
-            str.push_back(0xC0 | (code_point >> 6));
-            str.push_back(0x80 | (code_point & 0x3F));
-        } else if (code_point <= 0xFFFF) {
-            str.push_back(0xE0 | (code_point >> 12));
-            str.push_back(0x80 | ((code_point >> 6) & 0x3F));
-            str.push_back(0x80 | (code_point & 0x3F));
-        } else if (code_point <= 0x10FFFF) {
-            str.push_back(0xF0 | (code_point >> 18));
-            str.push_back(0x80 | ((code_point >> 12) & 0x3F));
-            str.push_back(0x80 | ((code_point >> 6) & 0x3F));
-            str.push_back(0x80 | (code_point & 0x3F));
-        }
+  std::string str;
+  std::mt19937 generator(std::random_device{}());
+  std::uniform_int_distribution<uint32_t> distribution(0, 0x10FFFF);
+
+  while (str.size() < length) {
+    uint32_t code_point = distribution(generator);
+
+    // Skip surrogate pairs (0xD800 to 0xDFFF) and other invalid Unicode code
+    // points
+    if ((code_point >= 0xD800 && code_point <= 0xDFFF) ||
+        code_point > 0x10FFFF) {
+      continue;
     }
 
-    return str;
+    if (code_point <= 0x7F) {
+      str.push_back(static_cast<char>(code_point));
+    } else if (code_point <= 0x7FF) {
+      str.push_back(0xC0 | (code_point >> 6));
+      str.push_back(0x80 | (code_point & 0x3F));
+    } else if (code_point <= 0xFFFF) {
+      str.push_back(0xE0 | (code_point >> 12));
+      str.push_back(0x80 | ((code_point >> 6) & 0x3F));
+      str.push_back(0x80 | (code_point & 0x3F));
+    } else if (code_point <= 0x10FFFF) {
+      str.push_back(0xF0 | (code_point >> 18));
+      str.push_back(0x80 | ((code_point >> 12) & 0x3F));
+      str.push_back(0x80 | ((code_point >> 6) & 0x3F));
+      str.push_back(0x80 | (code_point & 0x3F));
+    }
+  }
+
+  return str;
 }
 
 std::u16string utf8ToUtf16BaseLine(const std::string &utf8,
                                    bool is_little_endian) {
-    std::u16string utf16;   // Resulting UTF-16 string
-    size_t i = 0;           // Index for traversing the UTF-8 string
-    size_t n = utf8.size(); // Total length of the UTF-8 string
-
-    // Loop through each byte of the UTF-8 string
-    while (i < n) {
-        uint32_t code_point = 0;   // The Unicode code point
-        unsigned char c = utf8[i]; // Current byte of the UTF-8 string
-
-        // Determine the number of bytes for this character based on its first byte
-        if ((c & 0x80) == 0) {
-            // 1-byte character (ASCII)
-            code_point = c;
-            ++i;
-        } else if ((c & 0xE0) == 0xC0) {
-            // 2-byte character
-            code_point = c & 0x1F;
-            code_point = (code_point << 6) | (utf8[i + 1] & 0x3F);
-            i += 2;
-        } else if ((c & 0xF0) == 0xE0) {
-            // 3-byte character
-            code_point = c & 0x0F;
-            code_point = (code_point << 6) | (utf8[i + 1] & 0x3F);
-            code_point = (code_point << 6) | (utf8[i + 2] & 0x3F);
-            i += 3;
-        } else if ((c & 0xF8) == 0xF0) {
-            // 4-byte character
-            code_point = c & 0x07;
-            code_point = (code_point << 6) | (utf8[i + 1] & 0x3F);
-            code_point = (code_point << 6) | (utf8[i + 2] & 0x3F);
-            code_point = (code_point << 6) | (utf8[i + 3] & 0x3F);
-            i += 4;
-        } else {
-            // Invalid UTF-8 byte sequence
-            throw std::invalid_argument("Invalid UTF-8 encoding.");
-        }
-
-        // If the code point is beyond the BMP range, use surrogate pairs
-        if (code_point >= 0x10000) {
-            code_point -= 0x10000; // Subtract 0x10000 to get the surrogate pair
-            uint16_t high_surrogate = 0xD800 + (code_point >> 10);  // High surrogate
-            uint16_t low_surrogate = 0xDC00 + (code_point & 0x3FF); // Low surrogate
-
-            // If not little-endian, swap bytes of the surrogates
-            if (!is_little_endian) {
-                high_surrogate = (high_surrogate >> 8) | (high_surrogate << 8);
-                low_surrogate = (low_surrogate >> 8) | (low_surrogate << 8);
-            }
-
-            // Add both high and low surrogates to the UTF-16 string
-            utf16.push_back(high_surrogate);
-            utf16.push_back(low_surrogate);
-        } else {
-            // For code points within the BMP range, directly store as a 16-bit value
-            uint16_t utf16_char = static_cast<uint16_t>(code_point);
-
-            // If not little-endian, swap the bytes of the 16-bit character
-            if (!is_little_endian) {
-                utf16_char = (utf16_char >> 8) | (utf16_char << 8);
-            }
-
-            // Add the UTF-16 character to the string
-            utf16.push_back(utf16_char);
-        }
+  std::u16string utf16;   // Resulting UTF-16 string
+  size_t i = 0;           // Index for traversing the UTF-8 string
+  size_t n = utf8.size(); // Total length of the UTF-8 string
+
+  // Loop through each byte of the UTF-8 string
+  while (i < n) {
+    uint32_t code_point = 0;   // The Unicode code point
+    unsigned char c = utf8[i]; // Current byte of the UTF-8 string
+
+    // Determine the number of bytes for this character based on its first byte
+    if ((c & 0x80) == 0) {
+      // 1-byte character (ASCII)
+      code_point = c;
+      ++i;
+    } else if ((c & 0xE0) == 0xC0) {
+      // 2-byte character
+      code_point = c & 0x1F;
+      code_point = (code_point << 6) | (utf8[i + 1] & 0x3F);
+      i += 2;
+    } else if ((c & 0xF0) == 0xE0) {
+      // 3-byte character
+      code_point = c & 0x0F;
+      code_point = (code_point << 6) | (utf8[i + 1] & 0x3F);
+      code_point = (code_point << 6) | (utf8[i + 2] & 0x3F);
+      i += 3;
+    } else if ((c & 0xF8) == 0xF0) {
+      // 4-byte character
+      code_point = c & 0x07;
+      code_point = (code_point << 6) | (utf8[i + 1] & 0x3F);
+      code_point = (code_point << 6) | (utf8[i + 2] & 0x3F);
+      code_point = (code_point << 6) | (utf8[i + 3] & 0x3F);
+      i += 4;
+    } else {
+      // Invalid UTF-8 byte sequence
+      throw std::invalid_argument("Invalid UTF-8 encoding.");
     }
 
-    // Return the resulting UTF-16 string
-    return utf16;
+    // If the code point is beyond the BMP range, use surrogate pairs
+    if (code_point >= 0x10000) {
+      code_point -= 0x10000; // Subtract 0x10000 to get the surrogate pair
+      uint16_t high_surrogate = 0xD800 + (code_point >> 10);  // High surrogate
+      uint16_t low_surrogate = 0xDC00 + (code_point & 0x3FF); // Low surrogate
+
+      // If not little-endian, swap bytes of the surrogates
+      if (!is_little_endian) {
+        high_surrogate = (high_surrogate >> 8) | (high_surrogate << 8);
+        low_surrogate = (low_surrogate >> 8) | (low_surrogate << 8);
+      }
+
+      // Add both high and low surrogates to the UTF-16 string
+      utf16.push_back(high_surrogate);
+      utf16.push_back(low_surrogate);
+    } else {
+      // For code points within the BMP range, directly store as a 16-bit value
+      uint16_t utf16_char = static_cast<uint16_t>(code_point);
+
+      // If not little-endian, swap the bytes of the 16-bit character
+      if (!is_little_endian) {
+        utf16_char = (utf16_char >> 8) | (utf16_char << 8);
+      }
+
+      // Add the UTF-16 character to the string
+      utf16.push_back(utf16_char);
+    }
+  }
+
+  // Return the resulting UTF-16 string
+  return utf16;
 }
 
 // Testing Basic Logic
 TEST(UTF8ToUTF16Test, BasicConversion) {
-std::string utf8 = u8"Hello, 世界!";
-std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
-ASSERT_EQ(utf16, u"Hello, 世界!");
+  std::string utf8 = u8"Hello, 世界!";
+  std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
+  ASSERT_EQ(utf16, u"Hello, 世界!");
 }
 
 // Testing Empty String
 TEST(UTF8ToUTF16Test, EmptyString) {
-std::string utf8 = "";
-std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
-ASSERT_EQ(utf16, u"");
+  std::string utf8 = "";
+  std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
+  ASSERT_EQ(utf16, u"");
 }
 
 // Testing emoji
 TEST(UTF8ToUTF16Test, SurrogatePairs) {
-std::string utf8 = "\xF0\x9F\x98\x80"; // 😀 emoji
-std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
-std::u16string expected_utf16 = {0xD83D, 0xDE00}; // Surrogate pair for emoji
-ASSERT_EQ(utf16, expected_utf16);
+  std::string utf8 = "\xF0\x9F\x98\x80"; // 😀 emoji
+  std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
+  std::u16string expected_utf16 = {0xD83D, 0xDE00}; // Surrogate pair for emoji
+  ASSERT_EQ(utf16, expected_utf16);
 }
 
 // Correct Boundary testing for U+FFFD (replacement character)
 TEST(UTF8ToUTF16Test, BoundaryValues) {
-// "\xEF\xBF\xBD" is the UTF-8 encoding for U+FFFD (replacement character)
-std::string utf8 = "\xEF\xBF\xBD"; // U+FFFD in UTF-8
-std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
-std::u16string expected_utf16 = {
-        0xFFFD}; // Expected UTF-16 representation of U+FFFD
-ASSERT_EQ(utf16, expected_utf16);
+  // "\xEF\xBF\xBD" is the UTF-8 encoding for U+FFFD (replacement character)
+  std::string utf8 = "\xEF\xBF\xBD"; // U+FFFD in UTF-8
+  std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
+  std::u16string expected_utf16 = {
+      0xFFFD}; // Expected UTF-16 representation of U+FFFD
+  ASSERT_EQ(utf16, expected_utf16);
 }
 
 // Testing Special Characters
 TEST(UTF8ToUTF16Test, SpecialCharacters) {
-std::string utf8 = " \n\t";
-std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
-ASSERT_EQ(utf16, u" \n\t");
+  std::string utf8 = " \n\t";
+  std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
+  ASSERT_EQ(utf16, u" \n\t");
 }
 
 // Testing LittleEndian
 TEST(UTF8ToUTF16Test, LittleEndian) {
-std::string utf8 = "ab";
-std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
-std::u16string expected_utf16 = {
-        0x61, 0x62}; // Little-endian UTF-16 representation of "ab"
-ASSERT_EQ(utf16, expected_utf16);
+  std::string utf8 = "ab";
+  std::u16string utf16 = fury::utf8ToUtf16(utf8, true);
+  std::u16string expected_utf16 = {
+      0x61, 0x62}; // Little-endian UTF-16 representation of "ab"
+  ASSERT_EQ(utf16, expected_utf16);
 }
 
 // Correct BigEndian testing for BOM (Byte Order Mark)
 TEST(UTF8ToUTF16Test, BigEndian) {
-std::string utf8 = "\xEF\xBB\xBF"; // BOM in UTF-8 (0xFEFF)
-std::u16string utf16 = fury::utf8ToUtf16(utf8, false); // Big-endian
-std::u16string expected_utf16 = {0xFFFE}; // Expected BOM in UTF-16
-ASSERT_EQ(utf16, expected_utf16);
+  std::string utf8 = "\xEF\xBB\xBF"; // BOM in UTF-8 (0xFEFF)
+  std::u16string utf16 = fury::utf8ToUtf16(utf8, false); // Big-endian
+  std::u16string expected_utf16 = {0xFFFE}; // Expected BOM in UTF-16
+  ASSERT_EQ(utf16, expected_utf16);
 }
 
 // Testing round-trip conversion (UTF-8 -> UTF-16 -> UTF-8)
 TEST(UTF8ToUTF16Test, RoundTripConversion) {
-std::string original_utf8 = u8"Hello, 世界!";
-std::u16string utf16 = fury::utf8ToUtf16(original_utf8, true);
-std::string utf8_converted_back = fury::utf16ToUtf8(utf16, true);
-ASSERT_EQ(original_utf8, utf8_converted_back);
+  std::string original_utf8 = u8"Hello, 世界!";
+  std::u16string utf16 = fury::utf8ToUtf16(original_utf8, true);
+  std::string utf8_converted_back = fury::utf16ToUtf8(utf16, true);
+  ASSERT_EQ(original_utf8, utf8_converted_back);
 }
 
 // Testing Performance
 TEST(UTF8ToUTF16Test, PerformanceTest) {
-const size_t num_tests = 1000;
-const size_t string_length = 1000;
-// Default little_endian
-bool is_little_endian = true;
-
-// Random UTF-8
-std::vector<std::string> test_strings;
-for (size_t i = 0; i < num_tests; ++i) {
-test_strings.push_back(generateRandomUTF8String(string_length));
-}
-
-// Standard Library
-try {
-auto start_time = std::chrono::high_resolution_clock::now();
-std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> convert;
-// Loop through test strings and convert each UTF-8 string to UTF-16
-for (const auto &str : test_strings) {
-std::wstring wide_str = convert.from_bytes(str);
-std::u16string utf16;
-for (wchar_t wc : wide_str) {
-utf16.push_back(static_cast<char16_t>(wc));
-}
-}
-auto end_time = std::chrono::high_resolution_clock::now();
-auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
-        end_time - start_time)
-        .count();
-FURY_LOG(INFO) << "Standard Library Running Time: " << duration << " ns";
-} catch (const std::exception &e) {
-FURY_LOG(FATAL) << "Caught exception in standard library conversion: "
-<< e.what();
-}
-
-// BaseLine
-try {
-auto start_time = std::chrono::high_resolution_clock::now();
-for (const auto &str : test_strings) {
-std::u16string utf16 = utf8ToUtf16BaseLine(str, is_little_endian);
-}
-auto end_time = std::chrono::high_resolution_clock::now();
-auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
-        end_time - start_time)
-        .count();
-FURY_LOG(INFO) << "BaseLine Running Time: " << duration << " ns";
-} catch (const std::exception &e) {
-FURY_LOG(FATAL) << "Caught exception in baseline conversion: " << e.what();
-}
-
-// Optimized (SIMD)
-try {
-auto start_time = std::chrono::high_resolution_clock::now();
-for (const auto &str : test_strings) {
-std::u16string utf16 = fury::utf8ToUtf16(str, is_little_endian);
-}
-auto end_time = std::chrono::high_resolution_clock::now();
-auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
-        end_time - start_time)
-        .count();
-FURY_LOG(INFO) << "SIMD Optimized Running Time: " << duration << " ns";
-} catch (const std::exception &e) {
-FURY_LOG(FATAL) << "Caught exception in SIMD optimized conversion: "
-<< e.what();
-}
+  const size_t num_tests = 1000;
+  const size_t string_length = 1000;
+  // Default little_endian
+  bool is_little_endian = true;
+
+  // Random UTF-8
+  std::vector<std::string> test_strings;
+  for (size_t i = 0; i < num_tests; ++i) {
+    test_strings.push_back(generateRandomUTF8String(string_length));
+  }
+
+  // Standard Library
+  try {
+    auto start_time = std::chrono::high_resolution_clock::now();
+    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> convert;
+    // Loop through test strings and convert each UTF-8 string to UTF-16
+    for (const auto &str : test_strings) {
+      std::wstring wide_str = convert.from_bytes(str);
+      std::u16string utf16;
+      for (wchar_t wc : wide_str) {
+        utf16.push_back(static_cast<char16_t>(wc));
+      }
+    }
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                        end_time - start_time)
+                        .count();
+    FURY_LOG(INFO) << "Standard Library Running Time: " << duration << " ns";
+  } catch (const std::exception &e) {
+    FURY_LOG(FATAL) << "Caught exception in standard library conversion: "
+                    << e.what();
+  }
+
+  // BaseLine
+  try {
+    auto start_time = std::chrono::high_resolution_clock::now();
+    for (const auto &str : test_strings) {
+      std::u16string utf16 = utf8ToUtf16BaseLine(str, is_little_endian);
+    }
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                        end_time - start_time)
+                        .count();
+    FURY_LOG(INFO) << "BaseLine Running Time: " << duration << " ns";
+  } catch (const std::exception &e) {
+    FURY_LOG(FATAL) << "Caught exception in baseline conversion: " << e.what();
+  }
+
+  // Optimized (SIMD)
+  try {
+    auto start_time = std::chrono::high_resolution_clock::now();
+    for (const auto &str : test_strings) {
+      std::u16string utf16 = fury::utf8ToUtf16(str, is_little_endian);
+    }
+    auto end_time = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                        end_time - start_time)
+                        .count();
+    FURY_LOG(INFO) << "SIMD Optimized Running Time: " << duration << " ns";
+  } catch (const std::exception &e) {
+    FURY_LOG(FATAL) << "Caught exception in SIMD optimized conversion: "
+                    << e.what();
+  }
 }
 
 } // namespace fury
 
 int main(int argc, char **argv) {
-    ::testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
 }
\ No newline at end of file