apacheGH-38958: [C++][Parquet] Fix spelling (apache#38959)

### Rationale for this change ### What changes are included in this PR? Spelling fixes to *parquet* ### Are these changes tested? ### Are there any user-facing changes? * Closes: apache#38958 Authored-by: Josh Soref <[email protected]> Signed-off-by: mwish <[email protected]>
dgreiss · Feb 17, 2024 · 1f605dd · 1f605dd
1 parent 061c15c
commit 1f605dd
Show file tree

Hide file tree

Showing 21 changed files with 44 additions and 44 deletions.
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -2410,7 +2410,7 @@ TEST(TestArrowReadWrite, WaitCoalescedReads) {
   ASSERT_EQ(actual_batch->num_rows(), num_rows);
 }
 
-// Use coalesced reads and non-coaleasced reads for different column chunks.
+// Use coalesced reads and non-coalesced reads for different column chunks.
 TEST(TestArrowReadWrite, CoalescedReadsAndNonCoalescedReads) {
   constexpr int num_columns = 5;
   constexpr int num_rows = 128;
@@ -2918,7 +2918,7 @@ TEST(ArrowReadWrite, DecimalStats) {
   auto table = ::arrow::Table::Make(::arrow::schema({field("root", type)}), {array});
 
   std::shared_ptr<Buffer> buffer;
-  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, /*row_grop_size=*/100,
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, /*row_group_size=*/100,
                                              default_arrow_writer_properties(), &buffer));
 
   std::unique_ptr<FileReader> reader;

diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc
@@ -66,7 +66,7 @@
 // 3.  In order to keep repetition/definition level populated the algorithm is lazy
 //     in assigning repetition levels. The algorithm tracks whether it is currently
 //     in the middle of a list by comparing the lengths of repetition/definition levels.
-//     If it is currently in the middle of a list the the number of repetition levels
+//     If it is currently in the middle of a list the number of repetition levels
 //     populated will be greater than definition levels (the start of a List requires
 //     adding the first element). If there are equal numbers of definition and repetition
 //     levels populated this indicates a list is waiting to be started and the next list
@@ -141,7 +141,7 @@ int64_t LazyNullCount(const Array& array) { return array.data()->null_count.load
 bool LazyNoNulls(const Array& array) {
   int64_t null_count = LazyNullCount(array);
   return null_count == 0 ||
-         // kUnkownNullCount comparison is needed to account
+         // kUnknownNullCount comparison is needed to account
          // for null arrays.
          (null_count == ::arrow::kUnknownNullCount &&
           array.null_bitmap_data() == nullptr);
@@ -312,7 +312,7 @@ struct NullableTerminalNode {
 // at least one other node).
 //
 // Type parameters:
-//    |RangeSelector| - A strategy for determine the the range of the child node to
+//    |RangeSelector| - A strategy for determine the range of the child node to
 //    process.
 //       this varies depending on the type of list (int32_t* offsets, int64_t* offsets of
 //       fixed.

diff --git a/cpp/src/parquet/arrow/path_internal.h b/cpp/src/parquet/arrow/path_internal.h
@@ -116,7 +116,7 @@ class PARQUET_EXPORT MultipathLevelBuilder {
   ///
   /// \param[in] array The array to process.
   /// \param[in] array_field_nullable Whether the algorithm should consider
-  ///   the the array column as nullable (as determined by its type's parent
+  ///   the array column as nullable (as determined by its type's parent
   ///   field).
   /// \param[in, out] context for use when allocating memory, etc.
   /// \param[out] write_leaf_callback Callback to receive results.
@@ -129,7 +129,7 @@ class PARQUET_EXPORT MultipathLevelBuilder {
   ///
   /// \param[in] array The array to process.
   /// \param[in] array_field_nullable Whether the algorithm should consider
-  ///   the the array column as nullable (as determined by its type's parent
+  ///   the array column as nullable (as determined by its type's parent
   ///   field).
   static ::arrow::Result<std::unique_ptr<MultipathLevelBuilder>> Make(
       const ::arrow::Array& array, bool array_field_nullable);

diff --git a/cpp/src/parquet/arrow/path_internal_test.cc b/cpp/src/parquet/arrow/path_internal_test.cc
@@ -381,7 +381,7 @@ TEST_F(MultipathLevelBuilderTest, NestedListsWithSomeNulls) {
                      /*rep_levels=*/std::vector<int16_t>{0, 0, 2, 2, 1, 1, 0, 2});
 }
 
-TEST_F(MultipathLevelBuilderTest, NestedListsWithSomeNullsSomeEmptys) {
+TEST_F(MultipathLevelBuilderTest, NestedListsWithSomeNullsSomeEmpties) {
   auto entries = field("Entries", ::arrow::int64(), /*nullable=*/true);
   auto list_field = field("list", list(entries), /*nullable=*/true);
   auto nested_list_type = list(list_field);
@@ -442,7 +442,7 @@ TEST_F(MultipathLevelBuilderTest, TripleNestedListsAllPresent) {
                      });
 }
 
-TEST_F(MultipathLevelBuilderTest, TripleNestedListsWithSomeNullsSomeEmptys) {
+TEST_F(MultipathLevelBuilderTest, TripleNestedListsWithSomeNullsSomeEmpties) {
   auto entries = field("Entries", ::arrow::int64(), /*nullable=*/true);
   auto list_field = field("list", list(entries), /*nullable=*/true);
   auto nested_list_type = list(list_field);

diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
@@ -229,7 +229,7 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
   }
 
   // The user implicitly wants timestamp data to retain its original time units,
-  // however the Arrow seconds time unit can not be represented (annotated) in
+  // however the Arrow seconds time unit cannot be represented (annotated) in
   // any version of Parquet and so must be coerced to milliseconds.
   if (type.unit() == ::arrow::TimeUnit::SECOND) {
     *logical_type =

diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
@@ -48,7 +48,7 @@ Result<std::shared_ptr<ArrowType>> MakeArrowInt(const LogicalType& logical_type)
       return integer.is_signed() ? ::arrow::int32() : ::arrow::uint32();
     default:
       return Status::TypeError(logical_type.ToString(),
-                               " can not annotate physical type Int32");
+                               " cannot annotate physical type Int32");
   }
 }
 
@@ -59,7 +59,7 @@ Result<std::shared_ptr<ArrowType>> MakeArrowInt64(const LogicalType& logical_typ
       return integer.is_signed() ? ::arrow::int64() : ::arrow::uint64();
     default:
       return Status::TypeError(logical_type.ToString(),
-                               " can not annotate physical type Int64");
+                               " cannot annotate physical type Int64");
   }
 }
 
@@ -70,7 +70,7 @@ Result<std::shared_ptr<ArrowType>> MakeArrowTime32(const LogicalType& logical_ty
       return ::arrow::time32(::arrow::TimeUnit::MILLI);
     default:
       return Status::TypeError(logical_type.ToString(),
-                               " can not annotate physical type Time32");
+                               " cannot annotate physical type Time32");
   }
 }
 
@@ -83,7 +83,7 @@ Result<std::shared_ptr<ArrowType>> MakeArrowTime64(const LogicalType& logical_ty
       return ::arrow::time64(::arrow::TimeUnit::NANO);
     default:
       return Status::TypeError(logical_type.ToString(),
-                               " can not annotate physical type Time64");
+                               " cannot annotate physical type Time64");
   }
 }
 

diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
@@ -294,7 +294,7 @@ class FileWriterImpl : public FileWriter {
       for (int i = 0; i < schema_->num_fields(); ++i) {
         // Explicitly create each ArrowWriteContext object to avoid unintentional
         // call of the copy constructor. Otherwise, the buffers in the type of
-        // sharad_ptr will be shared among all contexts.
+        // shared_ptr will be shared among all contexts.
         parallel_column_write_contexts_.emplace_back(pool, arrow_properties_.get());
       }
     }

diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc
@@ -269,7 +269,7 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRepeated) {
 }
 
 // Tests skipping around page boundaries.
-TEST_F(TestPrimitiveReader, TestSkipAroundPageBoundries) {
+TEST_F(TestPrimitiveReader, TestSkipAroundPageBoundaries) {
   int levels_per_page = 100;
   int num_pages = 7;
   max_def_level_ = 0;
@@ -372,7 +372,7 @@ TEST_F(TestPrimitiveReader, TestSkipRepeatedField) {
   InitReader(&descr);
   Int32Reader* reader = static_cast<Int32Reader*>(reader_.get());
 
-  // Vecotrs to hold read values, definition levels, and repetition levels.
+  // Vectors to hold read values, definition levels, and repetition levels.
   std::vector<int32_t> read_vals(4, -1);
   std::vector<int16_t> read_defs(4, -1);
   std::vector<int16_t> read_reps(4, -1);
@@ -902,7 +902,7 @@ TEST_P(RecordReaderPrimitiveTypeTest, ReadRequiredRepeated) {
 }
 
 // Tests reading a nullable repeated field. Tests reading null values at
-// differnet levels and reading an empty list.
+// different levels and reading an empty list.
 TEST_P(RecordReaderPrimitiveTypeTest, ReadNullableRepeated) {
   NodePtr column = GroupNode::Make(
       "p", Repetition::OPTIONAL,
@@ -1240,7 +1240,7 @@ TEST_P(RecordReaderPrimitiveTypeTest, SkipRepeatedConsumeBufferFirst) {
     ASSERT_EQ(records_skipped, 12);
     CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/12,
                /*levels_position=*/0);
-    // Everthing is empty because we reset the reader before this skip.
+    // Everything is empty because we reset the reader before this skip.
     CheckReadValues(/*expected_values=*/{}, /*expected_def_levels=*/{},
                     /*expected_rep_levels=*/{});
   }
@@ -1395,7 +1395,7 @@ TEST_P(RecordReaderPrimitiveTypeTest, SkipPartialRecord) {
   }
 }
 
-INSTANTIATE_TEST_SUITE_P(RecordReaderPrimitveTypeTests, RecordReaderPrimitiveTypeTest,
+INSTANTIATE_TEST_SUITE_P(RecordReaderPrimitiveTypeTests, RecordReaderPrimitiveTypeTest,
                          ::testing::Values(/*read_dense_for_nullable=*/true, false),
                          testing::PrintToStringParamName());
 
@@ -1608,8 +1608,8 @@ TEST_P(ByteArrayRecordReaderTest, ReadAndSkipOptional) {
 }
 
 // Tests reading and skipping an optional FLBA field.
-// The binary readers only differ in DeocdeDense and DecodeSpaced functions, so
-// testing optional is sufficient in excercising those code paths.
+// The binary readers only differ in DecodeDense and DecodeSpaced functions, so
+// testing optional is sufficient in exercising those code paths.
 TEST_P(FLBARecordReaderTest, ReadAndSkipOptional) {
   MakeRecordReader(/*levels_per_page=*/90, /*num_pages=*/1, /*FLBA_type_length=*/4);
 
@@ -1710,7 +1710,7 @@ TEST_P(RecordReaderStressTest, StressTest) {
 
   // The levels_index and values_index are over the original vectors that have
   // all the rep/def values for all the records. In the following loop, we will
-  // read/skip a numebr of records and Reset the reader after each iteration.
+  // read/skip a number of records and Reset the reader after each iteration.
   // This is on-par with how the record reader is used.
   size_t levels_index = 0;
   size_t values_index = 0;

diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
@@ -75,7 +75,7 @@ namespace parquet {
 
 namespace {
 
-// Visitor that exracts the value buffer from a FlatArray at a given offset.
+// Visitor that extracts the value buffer from a FlatArray at a given offset.
 struct ValueBufferSlicer {
   template <typename T>
   ::arrow::enable_if_base_binary<typename T::TypeClass, Status> Visit(

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
@@ -745,7 +745,7 @@ void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
 template <typename DType>
 void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
   if (dict.null_count() > 0) {
-    throw ParquetException("Inserted dictionary cannot cannot contain nulls");
+    throw ParquetException("Inserted dictionary cannot contain nulls");
   }
 
   if (encoder->num_entries() > 0) {

diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
@@ -233,7 +233,7 @@ class DictEncoder : virtual public TypedEncoder<DType> {
 
   /// \brief EXPERIMENTAL: Append dictionary indices into the encoder. It is
   /// assumed (without any boundschecking) that the indices reference
-  /// pre-existing dictionary values
+  /// preexisting dictionary values
   /// \param[in] indices the dictionary index values. Only Int32Array currently
   /// supported
   virtual void PutIndices(const ::arrow::Array& indices) = 0;

diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc
@@ -1634,7 +1634,7 @@ TYPED_TEST(TestDeltaBitPackEncoding, NonZeroPaddedMiniblockBitWidth) {
   }
 }
 
-// Test that the DELTA_BINARY_PACKED encoding works properply in the presence of values
+// Test that the DELTA_BINARY_PACKED encoding works properly in the presence of values
 // that will cause integer overflow (see GH-37939).
 TYPED_TEST(TestDeltaBitPackEncoding, DeltaBitPackedWrapping) {
   using T = typename TypeParam::c_type;

diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h
@@ -88,7 +88,7 @@ std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* colu
                                                          const char* footer_key);
 
 // The result of this function will be used to set into EncryptionConfiguration
-// as colum keys.
+// as column keys.
 std::string BuildColumnKeyMapping();
 
 // FileEncryptor and FileDecryptor are helper classes to write/read an encrypted parquet

diff --git a/cpp/src/parquet/level_conversion.h b/cpp/src/parquet/level_conversion.h
@@ -100,7 +100,7 @@ struct PARQUET_EXPORT LevelInfo {
     }
   }
 
-  /// Incremetns level for a optional node.
+  /// Increments level for a optional node.
   void IncrementOptional() { def_level++; }
 
   /// Increments levels for the repeated node.  Returns
@@ -112,7 +112,7 @@ struct PARQUET_EXPORT LevelInfo {
     // to distinguish between an empty list and a list with an item in it.
     ++rep_level;
     ++def_level;
-    // For levels >= repeated_ancenstor_def_level it indicates the list was
+    // For levels >= repeated_ancestor_def_level it indicates the list was
     // non-null and had at least one element.  This is important
     // for later decoding because we need to add a slot for these
     // values.  for levels < current_def_level no slots are added

diff --git a/cpp/src/parquet/level_conversion_test.cc b/cpp/src/parquet/level_conversion_test.cc
@@ -127,7 +127,7 @@ TEST(DefLevelsToBitmap, WithRepetitionLevelFiltersOutEmptyListValues) {
   level_info.repeated_ancestor_def_level = 1;
   level_info.def_level = 2;
   level_info.rep_level = 1;
-  // All zeros should be ignored, ones should be unset in the bitmp and 2 should be set.
+  // All zeros should be ignored, ones should be unset in the bitmap and 2 should be set.
   std::vector<int16_t> def_levels = {0, 0, 0, 2, 2, 1, 0, 2};
   DefLevelsToBitmap(def_levels.data(), def_levels.size(), level_info, &io);
 

diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc
@@ -428,7 +428,7 @@ class PageIndexReaderImpl : public PageIndexReader {
   /// Reader properties used to deserialize thrift object.
   const ReaderProperties& properties_;
 
-  /// File-level decrypter.
+  /// File-level decryptor.
   InternalFileDecryptor* file_decryptor_;
 
   /// Coalesced read ranges of page index of row groups that have been suggested by

diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h
@@ -231,13 +231,13 @@ class PARQUET_EXPORT PageIndexReader {
                         const std::vector<int32_t>& column_indices,
                         const PageIndexSelection& selection) = 0;
 
-  /// \brief Advise the reader page index of these row groups will not be read any more.
+  /// \brief Advise the reader page index of these row groups will not be read anymore.
   ///
   /// The PageIndexReader implementation has the opportunity to cancel any prefetch or
   /// release resource that are related to these row groups.
   ///
   /// \param[in] row_group_indices list of row group ordinal that whose page index will
-  /// not be accessed any more.
+  /// not be accessed anymore.
   virtual void WillNotNeed(const std::vector<int32_t>& row_group_indices) = 0;
 
   /// \brief Determine the column index and offset index ranges for the given row group.
@@ -263,7 +263,7 @@ class PARQUET_EXPORT ColumnIndexBuilder {
   /// \brief Add statistics of a data page.
   ///
   /// If the ColumnIndexBuilder has seen any corrupted statistics, it will
-  /// not update statistics any more.
+  /// not update statistics anymore.
   ///
   /// \param stats Page statistics in the encoded form.
   virtual void AddPage(const EncodedStatistics& stats) = 0;

diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
@@ -88,7 +88,7 @@ std::string lz4_raw_compressed_larger() {
   return data_file("lz4_raw_compressed_larger.parquet");
 }
 
-std::string overflow_i16_page_oridinal() {
+std::string overflow_i16_page_ordinal() {
   return data_file("overflow_i16_page_cnt.parquet");
 }
 
@@ -429,7 +429,7 @@ TEST_F(TestAllTypesPlain, TestBatchRead) {
   ASSERT_FALSE(col->HasNext());
 }
 
-TEST_F(TestAllTypesPlain, RowGroupColumnBoundchecking) {
+TEST_F(TestAllTypesPlain, RowGroupColumnBoundsChecking) {
   // Part of PARQUET-1857
   ASSERT_THROW(reader_->RowGroup(reader_->metadata()->num_row_groups()),
                ParquetException);
@@ -1311,7 +1311,7 @@ INSTANTIATE_TEST_SUITE_P(Lz4CodecTests, TestCodec, ::testing::ValuesIn(test_code
 // INT16_MAX pages. (GH-15074).
 TEST(TestFileReader, TestOverflowInt16PageOrdinal) {
   ReaderProperties reader_props;
-  auto file_reader = ParquetFileReader::OpenFile(overflow_i16_page_oridinal(),
+  auto file_reader = ParquetFileReader::OpenFile(overflow_i16_page_ordinal(),
                                                  /*memory_map=*/false, reader_props);
   auto metadata_ptr = file_reader->metadata();
   EXPECT_EQ(1, metadata_ptr->num_row_groups());

diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
@@ -255,14 +255,14 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio
         converted_type_ = logical_type_->ToConvertedType(&decimal_metadata_);
       } else {
         error << logical_type_->ToString();
-        error << " can not be applied to primitive type ";
+        error << " cannot be applied to primitive type ";
         error << TypeToString(physical_type);
         throw ParquetException(error.str());
       }
     } else {
       error << "Nested logical type ";
       error << logical_type_->ToString();
-      error << " can not be applied to non-group node";
+      error << " cannot be applied to non-group node";
       throw ParquetException(error.str());
     }
   } else {
@@ -344,7 +344,7 @@ GroupNode::GroupNode(const std::string& name, Repetition::type repetition,
       std::stringstream error;
       error << "Logical type ";
       error << logical_type_->ToString();
-      error << " can not be applied to group node";
+      error << " cannot be applied to group node";
       throw ParquetException(error.str());
     }
   } else {

diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc
@@ -908,7 +908,7 @@ static void ConfirmFactoryEquivalence(
 TEST(TestLogicalTypeConstruction, FactoryEquivalence) {
   // For each legacy converted type, ensure that the equivalent logical type object
   // can be obtained from either the base class's FromConvertedType() factory method or
-  // the logical type type class's Make() method (accessed via convenience methods on the
+  // the logical type class's Make() method (accessed via convenience methods on the
   // base class) and that these logical type objects are equivalent
 
   struct ConfirmFactoryEquivalenceArguments {
@@ -1870,7 +1870,7 @@ class TestSchemaElementConstruction : public ::testing::Test {
     if (expect_logicalType_) {
       ASSERT_TRUE(element_->__isset.logicalType)
           << node_->logical_type()->ToString()
-          << " logical type unexpectedly failed to genverate a logicalType in the Thrift "
+          << " logical type unexpectedly failed to generate a logicalType in the Thrift "
              "intermediate object";
       ASSERT_TRUE(check_logicalType_())
           << node_->logical_type()->ToString()

diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h
@@ -205,7 +205,7 @@ class PARQUET_EXPORT Statistics {
       ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
 
   /// \brief Create a new statistics instance given a column schema
-  /// definition and pre-existing state
+  /// definition and preexisting state
   /// \param[in] descr the column schema
   /// \param[in] encoded_min the encoded minimum value
   /// \param[in] encoded_max the encoded maximum value