From 85c414697342ab07003d6dca8a9177926b2e4f39 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 15 Nov 2021 11:33:36 +0530 Subject: [PATCH 1/3] spell check fix cpp/ --- cpp/src/binaryop/compiled/binary_ops.cuh | 4 ++-- cpp/src/groupby/sort/aggregate.cpp | 4 ++-- cpp/src/io/orc/aggregate_orc_metadata.cpp | 2 +- cpp/src/io/orc/aggregate_orc_metadata.hpp | 2 +- cpp/src/io/orc/stripe_enc.cu | 4 ++-- cpp/src/io/orc/writer_impl.cu | 2 +- cpp/src/io/parquet/parquet_gpu.hpp | 4 ++-- cpp/src/io/text/multibyte_split.cu | 2 +- cpp/src/lists/drop_list_duplicates.cu | 4 ++-- cpp/src/rolling/rolling_detail.cuh | 2 +- cpp/tests/column/column_view_shallow_test.cpp | 2 +- cpp/tests/datetime/datetime_ops_test.cpp | 4 ++-- cpp/tests/transform/row_bit_count_test.cu | 6 +++--- 13 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh index 84147fc9220..10e9b2532af 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cuh +++ b/cpp/src/binaryop/compiled/binary_ops.cuh @@ -117,7 +117,7 @@ struct ops_wrapper { } else { return BinaryOperator{}.template operator()(x, y); } - // To supress nvcc warning + // To suppress nvcc warning return std::invoke_result_t{}; }(); if constexpr (is_bool_result()) @@ -164,7 +164,7 @@ struct ops2_wrapper { } else { return BinaryOperator{}.template operator()(x, y); } - // To supress nvcc warning + // To suppress nvcc warning return std::invoke_result_t{}; }(); if constexpr (is_bool_result()) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 234bb447761..d68b701d75f 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -559,7 +559,7 @@ auto column_view_with_common_nulls(column_view const& column_0, column_view cons } /** - * @brief Perform covariance betweeen two child columns of non-nullable struct column. + * @brief Perform covariance between two child columns of non-nullable struct column. * */ template <> @@ -602,7 +602,7 @@ void aggregate_result_functor::operator()(aggregation c }; /** - * @brief Perform correlation betweeen two child columns of non-nullable struct column. + * @brief Perform correlation between two child columns of non-nullable struct column. * */ template <> diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 45d60605936..82161233a92 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -79,7 +79,7 @@ void add_nested_columns(std::map>& selected_co * @brief Adds the column with the given id to the mapping * * All nested columns and direct ancestors of column `id` are included. - * Columns that are not on the direct path are excluded, which may result in prunning. + * Columns that are not on the direct path are excluded, which may result in pruning. */ void add_column_to_mapping(std::map>& selected_columns, metadata const& metadata, diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp index 356d20843e8..7fbb8b0ceab 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.hpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp @@ -119,7 +119,7 @@ class aggregate_orc_metadata { * @brief Filters ORC file to a selection of columns, based on their paths in the file. * * Paths are in format "grandparent_col.parent_col.child_col", where the root ORC column is - * ommited to match the cuDF table hierarchy. + * omitted to match the cuDF table hierarchy. * * @param column_paths List of full column names (i.e. paths) to select from the ORC file * @return Columns hierarchy - lists of children columns and sorted columns in each nesting level diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index ff7b642be0e..2c231cf9381 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -654,7 +654,7 @@ static __device__ void encode_null_mask(orcenc_state_s* s, auto const mask_byte = get_mask_byte(column.null_mask(), column.offset()); auto dst_offset = offset + s->nnz; auto vbuf_bit_idx = [](int row) { - // valid_buf is a circular buffer with validitiy of 8 rows in each element + // valid_buf is a circular buffer with validity of 8 rows in each element return row % (encode_block_size * 8); }; if (dst_offset % 8 == 0 and pd_set_cnt == 8) { @@ -690,7 +690,7 @@ static __device__ void encode_null_mask(orcenc_state_s* s, ByteRLE(s, s->valid_buf, s->present_out / 8, nbytes_out, flush, t) * 8; if (!t) { - // Number of rows enocoded so far + // Number of rows encoded so far s->present_out += nrows_encoded; s->numvals -= min(s->numvals, nrows_encoded); } diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 1e580e360ca..0486f3ad405 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -1421,7 +1421,7 @@ pushdown_null_masks init_pushdown_null_masks(orc_table_view& orc_table, } } if (col.orc_kind() == LIST or col.orc_kind() == MAP) { - // Need a new pushdown mask unless both the parent and current colmn are not nullable + // Need a new pushdown mask unless both the parent and current column are not nullable auto const child_col = orc_table.column(col.child_begin()[0]); // pushdown mask applies to child column(s); use the child column size pd_masks.emplace_back(num_bitmask_words(child_col.size()), stream); diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index ac2e6ba5cfb..1bd4cb3c6f4 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -307,7 +307,7 @@ struct EncColumnChunk { statistics_chunk const* stats; //!< Fragment statistics uint32_t bfr_size; //!< Uncompressed buffer size uint32_t compressed_size; //!< Compressed buffer size - uint32_t max_page_data_size; //!< Max data size (excuding header) of any page in this chunk + uint32_t max_page_data_size; //!< Max data size (excluding header) of any page in this chunk uint32_t page_headers_size; //!< Sum of size of all page headers uint32_t start_row; //!< First row of chunk uint32_t num_rows; //!< Number of rows in chunk @@ -489,7 +489,7 @@ void InitFragmentStatistics(cudf::detail::device_2dspan groups /** * @brief Initialize per-chunk hash maps used for dictionary with sentinel values * - * @param chunks Flat span of chunks to intialize hash maps for + * @param chunks Flat span of chunks to initialize hash maps for * @param stream CUDA stream to use */ void initialize_chunk_hash_maps(device_span chunks, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index a427809c81a..d287b9f2419 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -260,7 +260,7 @@ cudf::size_type multibyte_split_scan_full_source(cudf::io::text::data_chunk_sour // Seeding the tile state with an identity value allows the 0th tile to follow the same logic as // the Nth tile, assuming it can look up an inclusive prefix. Without this seed, the 0th block - // would have to follow seperate logic. + // would have to follow separate logic. multibyte_split_seed_kernel<<<1, 1, 0, stream.value()>>>( // tile_multistates, tile_offsets, diff --git a/cpp/src/lists/drop_list_duplicates.cu b/cpp/src/lists/drop_list_duplicates.cu index 0663bc18ab3..527e834c76c 100644 --- a/cpp/src/lists/drop_list_duplicates.cu +++ b/cpp/src/lists/drop_list_duplicates.cu @@ -67,7 +67,7 @@ struct has_negative_nans_fn { * @brief A structure to be used along with type_dispatcher to check if a column has any * negative NaN value. * - * This functor is neccessary because when calling to segmented sort on the list entries, the + * This functor is necessary because when calling to segmented sort on the list entries, the * negative NaN and positive NaN values (if both exist) are separated to the two ends of the output * lists. We want to move all NaN values close together in order to call unique_copy later on. */ @@ -563,7 +563,7 @@ std::pair, std::unique_ptr> drop_list_duplicates values ? cudf::empty_like(values.value().parent()) : nullptr}; } - // The child column conotaining list entries. + // The child column containing list entries. auto const keys_child = keys.get_sliced_child(stream); // Generate a mapping from list entries to their 1-based list indices for the keys column. diff --git a/cpp/src/rolling/rolling_detail.cuh b/cpp/src/rolling/rolling_detail.cuh index d9b67ff9da4..e65ead47a8c 100644 --- a/cpp/src/rolling/rolling_detail.cuh +++ b/cpp/src/rolling/rolling_detail.cuh @@ -722,7 +722,7 @@ class rolling_aggregation_preprocessor final : public cudf::detail::simple_aggre } // STD aggregations depends on VARIANCE aggregation. Each element is applied - // with sqaured-root in the finalize() step. + // with square-root in the finalize() step. std::vector> visit(data_type, cudf::detail::std_aggregation const& agg) override { diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp index ab324ea8505..4afa96f08d7 100644 --- a/cpp/tests/column/column_view_shallow_test.cpp +++ b/cpp/tests/column/column_view_shallow_test.cpp @@ -84,7 +84,7 @@ TYPED_TEST_SUITE(ColumnViewShallowTests, AllTypes); // Test for fixed_width, dict, string, list, struct // column_view, column_view = same hash. // column_view, make a copy = same hash. -// new column_view from colmn = same hash +// new column_view from column = same hash // column_view, copy column = diff hash // column_view, diff column = diff hash. // diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index c0d2d1cc447..6634b5ea435 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -742,7 +742,7 @@ TEST_F(BasicDatetimeOpsTest, TestIsLeapYear) 707904541L, // 1992-06-07 08:09:01 GMT - leap year -2181005247L, // 1900-11-20 09:12:33 GMT - non leap year 0L, // UNIX EPOCH 1970-01-01 00:00:00 GMT - non leap year - -12212553600L, // First full year of Gregorian Calandar 1583-01-01 00:00:00 - non-leap-year + -12212553600L, // First full year of Gregorian Calendar 1583-01-01 00:00:00 - non-leap-year 0L, // null 13591632822L, // 2400-09-13 13:33:42 GMT - leap year 4539564243L, // 2113-11-08 06:04:03 GMT - non leap year @@ -811,7 +811,7 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) 707904541L, // 1992-06-07 08:09:01 GMT -2181005247L, // 1900-11-20 09:12:33 GMT 0L, // UNIX EPOCH 1970-01-01 00:00:00 GMT - -12212553600L, // First full year of Gregorian Calandar 1583-01-01 00:00:00 + -12212553600L, // First full year of Gregorian Calendar 1583-01-01 00:00:00 0L, // null 13591632822L, // 2400-09-13 13:33:42 GMT 4539564243L, // 2113-11-08 06:04:03 GMT diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu index f718fbfc57b..7041d0507b0 100644 --- a/cpp/tests/transform/row_bit_count_test.cu +++ b/cpp/tests/transform/row_bit_count_test.cu @@ -229,7 +229,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock) // Tests that `row_bit_count()` can handle struct> with more // than max_block_size (256) rows. // With a large number of rows, computation spills to multiple thread-blocks, - // thus exercising the branch-stack comptutation. + // thus exercising the branch-stack computation. // The contents of the input column aren't as pertinent to this test as the // column size. For what it's worth, it looks as follows: // [ struct({0,1}), struct({2,3}), struct({4,5}), ... ] @@ -363,7 +363,7 @@ std::pair, std::unique_ptr> build_nested_and_exp // Inner list column // clang-format off cudf::test::lists_column_wrapper list{ - {1, 2, 3, 4, 5}, + {1, 2, 3, 4, 5}, {6, 7, 8}, {33, 34, 35, 36, 37, 38, 39}, {-1, -2}, @@ -409,7 +409,7 @@ std::unique_ptr build_nested_column(std::vector const& struct_vali // Inner list column // clang-format off - cudf::test::lists_column_wrapper list{ + cudf::test::lists_column_wrapper list{ {{1, 2, 3, 4, 5}, {2, 3}}, {{6, 7, 8}, {8, 9}}, {{1, 2}, {3, 4, 5}, {33, 34, 35, 36, 37, 38, 39}}}; From 4a70dedf513249beeea28e5d3d584886edbf576c Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 15 Nov 2021 11:33:52 +0530 Subject: [PATCH 2/3] spell check fix python/ --- python/cudf/cudf/core/column/column.py | 4 ++-- python/cudf/cudf/core/column/datetime.py | 2 +- python/cudf/cudf/core/column/decimal.py | 2 +- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/core/groupby/groupby.py | 2 +- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/multiindex.py | 2 +- python/cudf/cudf/core/series.py | 8 ++++---- python/cudf/cudf/core/udf/pipeline.py | 2 +- python/cudf/cudf/core/udf/typing.py | 4 ++-- python/cudf/cudf/testing/testing.py | 2 +- python/cudf/cudf/tests/test_binops.py | 2 +- python/cudf/cudf/tests/test_custom_accessor.py | 2 +- python/cudf/cudf/tests/test_datetime.py | 2 +- python/cudf/cudf/tests/test_multiindex.py | 10 +++++----- python/cudf/cudf/tests/test_orc.py | 4 ++-- python/cudf/cudf/utils/gpu_utils.py | 2 +- python/cudf/cudf/utils/ioutils.py | 4 ++-- python/cudf/cudf/utils/utils.py | 4 ++-- python/dask_cudf/dask_cudf/_version.py | 2 +- python/dask_cudf/dask_cudf/backends.py | 2 +- python/dask_cudf/dask_cudf/io/parquet.py | 4 ++-- python/dask_cudf/dask_cudf/io/tests/test_parquet.py | 2 +- 23 files changed, 36 insertions(+), 36 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 5f9104263b1..cfff2d3e267 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -337,7 +337,7 @@ def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray": else: return self.dropna(drop_nan=False).data_array_view - # TODO: This method is decpreated and can be removed when the associated + # TODO: This method is deprecated and can be removed when the associated # Frame methods are removed. def to_array(self, fillna=None) -> np.ndarray: """Get a dense numpy array for the data. @@ -1851,7 +1851,7 @@ def as_column( arbitrary = np.asarray(arbitrary) - # Handle case that `arbitary` elements are cupy arrays + # Handle case that `arbitrary` elements are cupy arrays if ( shape and shape[0] diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 68379002e6b..d1b4266b80b 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -540,7 +540,7 @@ def infer_format(element: str, **kwargs) -> str: if len(second_parts) > 1: # "Z" indicates Zulu time(widely used in aviation) - Which is # UTC timezone that currently cudf only supports. Having any other - # unsuppported timezone will let the code fail below + # unsupported timezone will let the code fail below # with a ValueError. second_parts.remove("Z") second_part = "".join(second_parts[1:]) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 6409a9f9196..7037b8e6f36 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -160,7 +160,7 @@ def binary_operator(self, op, other, reflect=False): if reflect: self, other = other, self - # Binary Arithmatics between decimal columns. `Scale` and `precision` + # Binary Arithmetics between decimal columns. `Scale` and `precision` # are computed outside of libcudf if op in ("add", "sub", "mul", "div"): scale = _binop_scale(self.dtype, other.dtype, op) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b2e6588edb2..6734566b731 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -6368,7 +6368,7 @@ def wrapper(self, other, axis="columns", level=None, fill_value=None): # __wrapped__ attributes to `wrapped_func`. Cpython looks up the signature # string of a function by recursively delving into __wrapped__ until # it hits the first function that has __signature__ attribute set. To make - # the signature stirng of `wrapper` matches with its actual parameter list, + # the signature string of `wrapper` matches with its actual parameter list, # we directly set the __signature__ attribute of `wrapper` below. new_sig = inspect.signature( diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index ba69e42674a..13e7d0897f4 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -18,7 +18,7 @@ # The three functions below return the quantiles [25%, 50%, 75%] -# respectively, which are called in the describe() method to ouput +# respectively, which are called in the describe() method to output # the summary stats of a GroupBy object def _quantile_25(x): return x.quantile(0.25) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 35b80715cca..d187c0dc2d0 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -623,7 +623,7 @@ def _union(self, other, sort=None): else: return result - # If all the above optimizations don't cater to the inpputs, + # If all the above optimizations don't cater to the inputs, # we materialize RangeIndex's into `Int64Index` and # then perform `union`. return Int64Index(self._values)._union(other, sort=sort) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 7c132e3fb71..d82d5dd6e26 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -970,7 +970,7 @@ def _concat(cls, objs): source_data = [o.to_frame(index=False) for o in objs] - # TODO: Verify if this is really necesary or if we can rely on + # TODO: Verify if this is really necessary or if we can rely on # DataFrame._concat. if len(source_data) > 1: colnames = source_data[0].columns diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 00a8ebabe34..5c271bdc4bb 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2916,7 +2916,7 @@ def unique(self): def nunique(self, method="sort", dropna=True): """Returns the number of unique values of the Series: approximate version, - and exact version to be moved to libgdf + and exact version to be moved to libcudf Excludes NA values by default. @@ -2985,7 +2985,7 @@ def value_counts( Returns ------- - result : Series contanining counts of unique values. + result : Series containing counts of unique values. See also -------- @@ -3802,7 +3802,7 @@ def wrapper(self, other, level=None, fill_value=None, axis=0): # __wrapped__ attributes to `wrapped_func`. Cpython looks up the signature # string of a function by recursively delving into __wrapped__ until # it hits the first function that has __signature__ attribute set. To make - # the signature stirng of `wrapper` matches with its actual parameter list, + # the signature string of `wrapper` matches with its actual parameter list, # we directly set the __signature__ attribute of `wrapper` below. new_sig = inspect.signature( @@ -4989,7 +4989,7 @@ def _align_indices(series_list, how="outer", allow_non_unique=False): def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): """Returns a boolean array where two arrays are equal within a tolerance. - Two values in ``a`` and ``b`` are considiered equal when the following + Two values in ``a`` and ``b`` are considered equal when the following equation is satisfied. .. math:: diff --git a/python/cudf/cudf/core/udf/pipeline.py b/python/cudf/cudf/core/udf/pipeline.py index deb4546e8b8..2464906be04 100644 --- a/python/cudf/cudf/core/udf/pipeline.py +++ b/python/cudf/cudf/core/udf/pipeline.py @@ -316,7 +316,7 @@ def compile_or_get(frame, func, args): Return a compiled kernel in terms of MaskedTypes that launches a kernel equivalent of `f` for the dtypes of `df`. The kernel uses a thread for each row and calls `f` using that rows data / mask - to produce an output value and output valdity for each row. + to produce an output value and output validity for each row. If the UDF has already been compiled for this requested dtypes, a cached version will be returned instead of running compilation. diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 4b0f0bf1283..da7ff4c0e32 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -67,7 +67,7 @@ def unify(self, context, other): """ Often within a UDF an instance arises where a variable could be a `MaskedType`, an `NAType`, or a literal based off - the data at runtime, for examplem the variable `ret` here: + the data at runtime, for example the variable `ret` here: def f(x): if x == 1: @@ -185,7 +185,7 @@ class NAType(types.Type): """ A type for handling ops against nulls Exists so we can: - 1. Teach numba that all occurances of `cudf.NA` are + 1. Teach numba that all occurrences of `cudf.NA` are to be read as instances of this type instead 2. Define ops like `if x is cudf.NA` where `x` is of type `Masked` to mean `if x.valid is False` diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 9562fca7399..59c291eea0b 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -410,7 +410,7 @@ def assert_series_equal( Whether to check the Index class, dtype and inferred_type are identical. check_series_type : bool, default True - Whether to check the seires class, dtype and + Whether to check the series class, dtype and inferred_type are identical. Currently it is idle, and similar to pandas. check_less_precise : bool or int, default False diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 542dcd9301c..ba2a6dce369 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1173,7 +1173,7 @@ def make_scalar_product_data(): ) ) - # we can muliply any timedelta by any int, or bool + # we can multiply any timedelta by any int, or bool valid |= set(product(TIMEDELTA_TYPES, INTEGER_TYPES | BOOL_TYPES)) # we can multiply a float by any int, float, or bool diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py index 16e5b345ce2..bfd2ccbccef 100644 --- a/python/cudf/cudf/tests/test_custom_accessor.py +++ b/python/cudf/cudf/tests/test_custom_accessor.py @@ -44,7 +44,7 @@ def test_dataframe_accessor(gdf): "gdf2", [gd.datasets.randomdata(nrows=1, dtypes={"x": int, "y": int})] ) def test_dataframe_accessor_idendity(gdf1, gdf2): - """Test for accessor idendities + """Test for accessor identities - An object should hold persistent reference to the same accessor - Different objects should hold difference instances of the accessor """ diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index d666dfc0ec1..1768947ab8d 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -171,7 +171,7 @@ def test_dt_ops(data): assert_eq(pd_data > pd_data, gdf_data > gdf_data) -# libgdf doesn't respect timezones +# licudf doesn't respect timezones @pytest.mark.parametrize("data", [data1()]) @pytest.mark.parametrize("field", fields) def test_dt_series(data, field): diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index d409a099806..07407b8d359 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -738,9 +738,9 @@ def test_multiindex_copy_sem(data, levels, codes, names): ) @pytest.mark.parametrize("deep", [True, False]) def test_multiindex_copy_deep(data, deep): - """Test memory idendity for deep copy + """Test memory identity for deep copy Case1: Constructed from GroupBy, StringColumns - Case2: Constrcuted from MultiIndex, NumericColumns + Case2: Constructed from MultiIndex, NumericColumns """ same_ref = not deep @@ -768,19 +768,19 @@ def test_multiindex_copy_deep(data, deep): mi1 = data mi2 = mi1.copy(deep=deep) - # Assert ._levels idendity + # Assert ._levels identity lptrs = [lv._data._data[None].base_data.ptr for lv in mi1._levels] rptrs = [lv._data._data[None].base_data.ptr for lv in mi2._levels] assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)]) - # Assert ._codes idendity + # Assert ._codes identity lptrs = [c.base_data.ptr for _, c in mi1._codes._data.items()] rptrs = [c.base_data.ptr for _, c in mi2._codes._data.items()] assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)]) - # Assert ._data idendity + # Assert ._data identity lptrs = [d.base_data.ptr for _, d in mi1._data.items()] rptrs = [d.base_data.ptr for _, d in mi2._data.items()] diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 99b5652110b..6b02874146e 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -502,7 +502,7 @@ def test_orc_writer_sliced(tmpdir): "TestOrcFile.decimal.orc", "TestOrcFile.decimal.same.values.orc", "TestOrcFile.decimal.multiple.values.orc", - # For addional information take look at PR 7034 + # For additional information take look at PR 7034 "TestOrcFile.decimal.runpos.issue.orc", ], ) @@ -541,7 +541,7 @@ def test_orc_decimal_precision_fail(datadir): assert_eq(pdf, gdf) -# For addional information take look at PR 6636 and 6702 +# For additional information take look at PR 6636 and 6702 @pytest.mark.parametrize( "orc_file", [ diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py index 77963f8bcc1..dbdd68f2df8 100644 --- a/python/cudf/cudf/utils/gpu_utils.py +++ b/python/cudf/cudf/utils/gpu_utils.py @@ -143,7 +143,7 @@ def _try_get_old_or_new_symbols(): cuda_driver_supported_rt_version >= 11000 and cuda_runtime_version >= 11000 ): - # With cuda enhanced compatibitlity any code compiled + # With cuda enhanced compatibility any code compiled # with 11.x version of cuda can now run on any # driver >= 450.80.02. 11000 is the minimum cuda # version 450.80.02 supports. diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 6746753249c..a7891957102 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1032,7 +1032,7 @@ should consume messages from. Valid values are 0 - (N-1) start_offset : int, Kafka Topic/Partition offset that consumption should begin at. Inclusive. -end_offset : int, Kafka Topic/Parition offset that consumption +end_offset : int, Kafka Topic/Partition offset that consumption should end at. Inclusive. batch_timeout : int, default 10000 Maximum number of milliseconds that will be spent trying to @@ -1055,7 +1055,7 @@ or any object with a `read()` method (such as builtin `open()` file handler function or `StringIO`). delimiter : string, default None, The delimiter that should be used - for splitting text chunks into seperate cudf column rows. Currently + for splitting text chunks into separate cudf column rows. Currently only a single delimiter is supported. Returns diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 4f9b23bf6fe..a9611a91554 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -353,7 +353,7 @@ def get_appropriate_dispatched_func( elif hasattr(cupy_submodule, fname): cupy_func = getattr(cupy_submodule, fname) - # Handle case if cupy impliments it as a numpy function + # Handle case if cupy implements it as a numpy function # Unsure if needed if cupy_func is func: return NotImplemented @@ -374,7 +374,7 @@ def _cast_to_appropriate_cudf_type(val, index=None): elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1): # if index is not None and is of a different length # than the index, cupy dispatching behaviour is undefined - # so we dont impliment it + # so we don't implement it if (index is None) or (len(index) == len(val)): return cudf.Series(val, index=index) diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py index eb7457f3465..8ca2cf98381 100644 --- a/python/dask_cudf/dask_cudf/_version.py +++ b/python/dask_cudf/dask_cudf/_version.py @@ -417,7 +417,7 @@ def render_pep440_old(pieces): The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index f81a4743a4a..89b5301ee83 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -196,7 +196,7 @@ def make_meta_object_cudf(x, index=None): ) elif not hasattr(x, "dtype") and x is not None: # could be a string, a dtype object, or a python type. Skip `None`, - # because it is implictly converted to `dtype('f8')`, which we don't + # because it is implicitly converted to `dtype('f8')`, which we don't # want here. try: dtype = np.dtype(x) diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 2e5d55e92d2..b47a5e78095 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -111,7 +111,7 @@ def _read_paths( frag = next(ds.get_fragments()) if frag: # Extract hive-partition keys, and make sure they - # are orderd the same as they are in `partitions` + # are ordered the same as they are in `partitions` raw_keys = pa_ds._get_partition_keys(frag.partition_expression) partition_keys = [ (hive_part.name, raw_keys[hive_part.name]) @@ -173,7 +173,7 @@ def read_partition( strings_to_cats = kwargs.get("strings_to_categorical", False) - # Assume multi-peice read + # Assume multi-piece read paths = [] rgs = [] last_partition_keys = None diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index d93037b3802..706b0e272ea 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -378,7 +378,7 @@ def test_chunksize(tmpdir, chunksize, metadata): # one output partition assert ddf3.npartitions == 1 else: - # Files can be aggregateed together, but + # Files can be aggregated together, but # chunksize is not large enough to produce # a single output partition assert ddf3.npartitions < num_row_groups From e366c50e89ea2d0147722bf0276c89aad217e0c8 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Tue, 16 Nov 2021 00:20:19 +0530 Subject: [PATCH 3/3] Update python/cudf/cudf/tests/test_datetime.py Co-authored-by: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> --- python/cudf/cudf/tests/test_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 1768947ab8d..5a81d2f1f36 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -171,7 +171,7 @@ def test_dt_ops(data): assert_eq(pd_data > pd_data, gdf_data > gdf_data) -# licudf doesn't respect timezones +# libcudf doesn't respect timezones @pytest.mark.parametrize("data", [data1()]) @pytest.mark.parametrize("field", fields) def test_dt_series(data, field):