diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 27e5827fdf255..dea08cf502fd0 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -196,15 +196,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.dt.tz_convert PR01,PR02" \ -i "pandas.Series.dt.tz_localize PR01,PR02" \ -i "pandas.Series.dt.unit GL08" \ - -i "pandas.Series.dtype SA01" \ -i "pandas.Series.eq PR07,SA01" \ -i "pandas.Series.floordiv PR07" \ -i "pandas.Series.ge PR07,SA01" \ -i "pandas.Series.gt PR07,SA01" \ - -i "pandas.Series.hasnans SA01" \ - -i "pandas.Series.is_monotonic_decreasing SA01" \ - -i "pandas.Series.is_monotonic_increasing SA01" \ - -i "pandas.Series.is_unique SA01" \ -i "pandas.Series.kurt RT03,SA01" \ -i "pandas.Series.kurtosis RT03,SA01" \ -i "pandas.Series.le PR07,SA01" \ @@ -236,7 +231,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.rsub PR07" \ -i "pandas.Series.rtruediv PR07" \ -i "pandas.Series.sem PR01,RT03,SA01" \ - -i "pandas.Series.shape SA01" \ -i "pandas.Series.skew RT03,SA01" \ -i "pandas.Series.sparse PR01,SA01" \ -i "pandas.Series.sparse.density SA01" \ @@ -317,8 +311,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Timestamp.ctime SA01" \ -i "pandas.Timestamp.date SA01" \ -i "pandas.Timestamp.day GL08" \ - -i "pandas.Timestamp.day_of_week SA01" \ - -i "pandas.Timestamp.dayofweek SA01" \ -i "pandas.Timestamp.dst SA01" \ -i "pandas.Timestamp.floor SA01" \ -i "pandas.Timestamp.fold GL08" \ diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 92799359a61d2..0ff40dcdcd150 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -160,11 +160,10 @@ Here is a sample (using 100 column x 100,000 row ``DataFrames``): .. csv-table:: :header: "Operation", "0.11.0 (ms)", "Prior Version (ms)", "Ratio to Prior" :widths: 25, 25, 25, 25 - :delim: ; - ``df1 > df2``; 13.32; 125.35; 0.1063 - ``df1 * df2``; 21.71; 36.63; 0.5928 - ``df1 + df2``; 22.04; 36.50; 0.6039 + ``df1 > df2``, 13.32, 125.35, 0.1063 + ``df1 * df2``, 21.71, 36.63, 0.5928 + ``df1 + df2``, 22.04, 36.50, 0.6039 You are highly encouraged to install both libraries. See the section :ref:`Recommended Dependencies ` for more installation info. diff --git a/doc/source/user_guide/gotchas.rst b/doc/source/user_guide/gotchas.rst index 99c85ac66623d..26eb656357bf6 100644 --- a/doc/source/user_guide/gotchas.rst +++ b/doc/source/user_guide/gotchas.rst @@ -315,19 +315,8 @@ Why not make NumPy like R? Many people have suggested that NumPy should simply emulate the ``NA`` support present in the more domain-specific statistical programming language `R -`__. Part of the reason is the NumPy type hierarchy: - -.. csv-table:: - :header: "Typeclass","Dtypes" - :widths: 30,70 - :delim: | - - ``numpy.floating`` | ``float16, float32, float64, float128`` - ``numpy.integer`` | ``int8, int16, int32, int64`` - ``numpy.unsignedinteger`` | ``uint8, uint16, uint32, uint64`` - ``numpy.object_`` | ``object_`` - ``numpy.bool_`` | ``bool_`` - ``numpy.character`` | ``bytes_, str_`` +`__. Part of the reason is the +`NumPy type hierarchy `__. The R language, by contrast, only has a handful of built-in data types: ``integer``, ``numeric`` (floating-point), ``character``, and diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 8c222aff52fd7..267499edfae6f 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -506,29 +506,28 @@ listed below, those with a ``*`` do *not* have an efficient, GroupBy-specific, i .. csv-table:: :header: "Method", "Description" :widths: 20, 80 - :delim: ; - - :meth:`~.DataFrameGroupBy.any`;Compute whether any of the values in the groups are truthy - :meth:`~.DataFrameGroupBy.all`;Compute whether all of the values in the groups are truthy - :meth:`~.DataFrameGroupBy.count`;Compute the number of non-NA values in the groups - :meth:`~.DataFrameGroupBy.cov` * ;Compute the covariance of the groups - :meth:`~.DataFrameGroupBy.first`;Compute the first occurring value in each group - :meth:`~.DataFrameGroupBy.idxmax`;Compute the index of the maximum value in each group - :meth:`~.DataFrameGroupBy.idxmin`;Compute the index of the minimum value in each group - :meth:`~.DataFrameGroupBy.last`;Compute the last occurring value in each group - :meth:`~.DataFrameGroupBy.max`;Compute the maximum value in each group - :meth:`~.DataFrameGroupBy.mean`;Compute the mean of each group - :meth:`~.DataFrameGroupBy.median`;Compute the median of each group - :meth:`~.DataFrameGroupBy.min`;Compute the minimum value in each group - :meth:`~.DataFrameGroupBy.nunique`;Compute the number of unique values in each group - :meth:`~.DataFrameGroupBy.prod`;Compute the product of the values in each group - :meth:`~.DataFrameGroupBy.quantile`;Compute a given quantile of the values in each group - :meth:`~.DataFrameGroupBy.sem`;Compute the standard error of the mean of the values in each group - :meth:`~.DataFrameGroupBy.size`;Compute the number of values in each group - :meth:`~.DataFrameGroupBy.skew` *;Compute the skew of the values in each group - :meth:`~.DataFrameGroupBy.std`;Compute the standard deviation of the values in each group - :meth:`~.DataFrameGroupBy.sum`;Compute the sum of the values in each group - :meth:`~.DataFrameGroupBy.var`;Compute the variance of the values in each group + + :meth:`~.DataFrameGroupBy.any`,Compute whether any of the values in the groups are truthy + :meth:`~.DataFrameGroupBy.all`,Compute whether all of the values in the groups are truthy + :meth:`~.DataFrameGroupBy.count`,Compute the number of non-NA values in the groups + :meth:`~.DataFrameGroupBy.cov` * ,Compute the covariance of the groups + :meth:`~.DataFrameGroupBy.first`,Compute the first occurring value in each group + :meth:`~.DataFrameGroupBy.idxmax`,Compute the index of the maximum value in each group + :meth:`~.DataFrameGroupBy.idxmin`,Compute the index of the minimum value in each group + :meth:`~.DataFrameGroupBy.last`,Compute the last occurring value in each group + :meth:`~.DataFrameGroupBy.max`,Compute the maximum value in each group + :meth:`~.DataFrameGroupBy.mean`,Compute the mean of each group + :meth:`~.DataFrameGroupBy.median`,Compute the median of each group + :meth:`~.DataFrameGroupBy.min`,Compute the minimum value in each group + :meth:`~.DataFrameGroupBy.nunique`,Compute the number of unique values in each group + :meth:`~.DataFrameGroupBy.prod`,Compute the product of the values in each group + :meth:`~.DataFrameGroupBy.quantile`,Compute a given quantile of the values in each group + :meth:`~.DataFrameGroupBy.sem`,Compute the standard error of the mean of the values in each group + :meth:`~.DataFrameGroupBy.size`,Compute the number of values in each group + :meth:`~.DataFrameGroupBy.skew` * ,Compute the skew of the values in each group + :meth:`~.DataFrameGroupBy.std`,Compute the standard deviation of the values in each group + :meth:`~.DataFrameGroupBy.sum`,Compute the sum of the values in each group + :meth:`~.DataFrameGroupBy.var`,Compute the variance of the values in each group Some examples: @@ -832,19 +831,18 @@ The following methods on GroupBy act as transformations. .. csv-table:: :header: "Method", "Description" :widths: 20, 80 - :delim: ; - - :meth:`~.DataFrameGroupBy.bfill`;Back fill NA values within each group - :meth:`~.DataFrameGroupBy.cumcount`;Compute the cumulative count within each group - :meth:`~.DataFrameGroupBy.cummax`;Compute the cumulative max within each group - :meth:`~.DataFrameGroupBy.cummin`;Compute the cumulative min within each group - :meth:`~.DataFrameGroupBy.cumprod`;Compute the cumulative product within each group - :meth:`~.DataFrameGroupBy.cumsum`;Compute the cumulative sum within each group - :meth:`~.DataFrameGroupBy.diff`;Compute the difference between adjacent values within each group - :meth:`~.DataFrameGroupBy.ffill`;Forward fill NA values within each group - :meth:`~.DataFrameGroupBy.pct_change`;Compute the percent change between adjacent values within each group - :meth:`~.DataFrameGroupBy.rank`;Compute the rank of each value within each group - :meth:`~.DataFrameGroupBy.shift`;Shift values up or down within each group + + :meth:`~.DataFrameGroupBy.bfill`,Back fill NA values within each group + :meth:`~.DataFrameGroupBy.cumcount`,Compute the cumulative count within each group + :meth:`~.DataFrameGroupBy.cummax`,Compute the cumulative max within each group + :meth:`~.DataFrameGroupBy.cummin`,Compute the cumulative min within each group + :meth:`~.DataFrameGroupBy.cumprod`,Compute the cumulative product within each group + :meth:`~.DataFrameGroupBy.cumsum`,Compute the cumulative sum within each group + :meth:`~.DataFrameGroupBy.diff`,Compute the difference between adjacent values within each group + :meth:`~.DataFrameGroupBy.ffill`,Forward fill NA values within each group + :meth:`~.DataFrameGroupBy.pct_change`,Compute the percent change between adjacent values within each group + :meth:`~.DataFrameGroupBy.rank`,Compute the rank of each value within each group + :meth:`~.DataFrameGroupBy.shift`,Shift values up or down within each group In addition, passing any built-in aggregation method as a string to :meth:`~.DataFrameGroupBy.transform` (see the next section) will broadcast the result @@ -1092,11 +1090,10 @@ efficient, GroupBy-specific, implementation. .. csv-table:: :header: "Method", "Description" :widths: 20, 80 - :delim: ; - :meth:`~.DataFrameGroupBy.head`;Select the top row(s) of each group - :meth:`~.DataFrameGroupBy.nth`;Select the nth row(s) of each group - :meth:`~.DataFrameGroupBy.tail`;Select the bottom row(s) of each group + :meth:`~.DataFrameGroupBy.head`,Select the top row(s) of each group + :meth:`~.DataFrameGroupBy.nth`,Select the nth row(s) of each group + :meth:`~.DataFrameGroupBy.tail`,Select the bottom row(s) of each group Users can also use transformations along with Boolean indexing to construct complex filtrations within groups. For example, suppose we are given groups of products and diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 0da87e1d31fec..371a380ff4b0d 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -94,13 +94,14 @@ well). Any of the axes accessors may be the null slice ``:``. Axes left out of the specification are assumed to be ``:``, e.g. ``p.loc['a']`` is equivalent to ``p.loc['a', :]``. -.. csv-table:: - :header: "Object Type", "Indexers" - :widths: 30, 50 - :delim: ; - Series; ``s.loc[indexer]`` - DataFrame; ``df.loc[row_indexer,column_indexer]`` +.. ipython:: python + + ser = pd.Series(range(5), index=list("abcde")) + ser.loc[["a", "c", "e"]] + + df = pd.DataFrame(np.arange(25).reshape(5, 5), index=list("abcde"), columns=list("abcde")) + df.loc[["a", "c", "e"], ["b", "d"]] .. _indexing.basics: @@ -116,10 +117,9 @@ indexing pandas objects with ``[]``: .. csv-table:: :header: "Object Type", "Selection", "Return Value Type" :widths: 30, 30, 60 - :delim: ; - Series; ``series[label]``; scalar value - DataFrame; ``frame[colname]``; ``Series`` corresponding to colname + Series, ``series[label]``, scalar value + DataFrame, ``frame[colname]``, ``Series`` corresponding to colname Here we construct a simple time series data set to use for illustrating the indexing functionality: diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 49609a80d7e15..b5cc8c43ae143 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -16,26 +16,25 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like .. csv-table:: :header: "Format Type", "Data Description", "Reader", "Writer" :widths: 30, 100, 60, 60 - :delim: ; - - text;`CSV `__;:ref:`read_csv`;:ref:`to_csv` - text;Fixed-Width Text File;:ref:`read_fwf` - text;`JSON `__;:ref:`read_json`;:ref:`to_json` - text;`HTML `__;:ref:`read_html`;:ref:`to_html` - text;`LaTeX `__;;:ref:`Styler.to_latex` - text;`XML `__;:ref:`read_xml`;:ref:`to_xml` - text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard` - binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` - binary;`OpenDocument `__;:ref:`read_excel`; - binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` - binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` - binary;`Parquet Format `__;:ref:`read_parquet`;:ref:`to_parquet` - binary;`ORC Format `__;:ref:`read_orc`;:ref:`to_orc` - binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` - binary;`SAS `__;:ref:`read_sas`; - binary;`SPSS `__;:ref:`read_spss`; - binary;`Python Pickle Format `__;:ref:`read_pickle`;:ref:`to_pickle` - SQL;`SQL `__;:ref:`read_sql`;:ref:`to_sql` + + text,`CSV `__, :ref:`read_csv`, :ref:`to_csv` + text,Fixed-Width Text File, :ref:`read_fwf` , NA + text,`JSON `__, :ref:`read_json`, :ref:`to_json` + text,`HTML `__, :ref:`read_html`, :ref:`to_html` + text,`LaTeX `__, :ref:`Styler.to_latex` , NA + text,`XML `__, :ref:`read_xml`, :ref:`to_xml` + text, Local clipboard, :ref:`read_clipboard`, :ref:`to_clipboard` + binary,`MS Excel `__ , :ref:`read_excel`, :ref:`to_excel` + binary,`OpenDocument `__, :ref:`read_excel`, NA + binary,`HDF5 Format `__, :ref:`read_hdf`, :ref:`to_hdf` + binary,`Feather Format `__, :ref:`read_feather`, :ref:`to_feather` + binary,`Parquet Format `__, :ref:`read_parquet`, :ref:`to_parquet` + binary,`ORC Format `__, :ref:`read_orc`, :ref:`to_orc` + binary,`Stata `__, :ref:`read_stata`, :ref:`to_stata` + binary,`SAS `__, :ref:`read_sas` , NA + binary,`SPSS `__, :ref:`read_spss` , NA + binary,`Python Pickle Format `__, :ref:`read_pickle`, :ref:`to_pickle` + SQL,`SQL `__, :ref:`read_sql`,:ref:`to_sql` :ref:`Here ` is an informal performance comparison for some of these IO methods. @@ -1837,14 +1836,13 @@ with optional parameters: .. csv-table:: :widths: 20, 150 - :delim: ; - ``split``; dict like {index -> [index], columns -> [columns], data -> [values]} - ``records``; list like [{column -> value}, ... , {column -> value}] - ``index``; dict like {index -> {column -> value}} - ``columns``; dict like {column -> {index -> value}} - ``values``; just the values array - ``table``; adhering to the JSON `Table Schema`_ + ``split``, dict like {index -> [index]; columns -> [columns]; data -> [values]} + ``records``, list like [{column -> value}; ... ] + ``index``, dict like {index -> {column -> value}} + ``columns``, dict like {column -> {index -> value}} + ``values``, just the values array + ``table``, adhering to the JSON `Table Schema`_ * ``date_format`` : string, type of date conversion, 'epoch' for timestamp, 'iso' for ISO8601. * ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10. @@ -2025,14 +2023,13 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` .. csv-table:: :widths: 20, 150 - :delim: ; - - ``split``; dict like {index -> [index], columns -> [columns], data -> [values]} - ``records``; list like [{column -> value}, ... , {column -> value}] - ``index``; dict like {index -> {column -> value}} - ``columns``; dict like {column -> {index -> value}} - ``values``; just the values array - ``table``; adhering to the JSON `Table Schema`_ + + ``split``, dict like {index -> [index]; columns -> [columns]; data -> [values]} + ``records``, list like [{column -> value} ...] + ``index``, dict like {index -> {column -> value}} + ``columns``, dict like {column -> {index -> value}} + ``values``, just the values array + ``table``, adhering to the JSON `Table Schema`_ * ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data. diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index cf27fc8385223..ad2690ae395be 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -726,57 +726,56 @@ Method summary .. csv-table:: :header: "Method", "Description" :widths: 20, 80 - :delim: ; - - :meth:`~Series.str.cat`;Concatenate strings - :meth:`~Series.str.split`;Split strings on delimiter - :meth:`~Series.str.rsplit`;Split strings on delimiter working from the end of the string - :meth:`~Series.str.get`;Index into each element (retrieve i-th element) - :meth:`~Series.str.join`;Join strings in each element of the Series with passed separator - :meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables - :meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex - :meth:`~Series.str.replace`;Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence - :meth:`~Series.str.removeprefix`;Remove prefix from string, i.e. only remove if string starts with prefix. - :meth:`~Series.str.removesuffix`;Remove suffix from string, i.e. only remove if string ends with suffix. - :meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``) - :meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings" - :meth:`~Series.str.center`;Equivalent to ``str.center`` - :meth:`~Series.str.ljust`;Equivalent to ``str.ljust`` - :meth:`~Series.str.rjust`;Equivalent to ``str.rjust`` - :meth:`~Series.str.zfill`;Equivalent to ``str.zfill`` - :meth:`~Series.str.wrap`;Split long strings into lines with length less than a given width - :meth:`~Series.str.slice`;Slice each string in the Series - :meth:`~Series.str.slice_replace`;Replace slice in each string with passed value - :meth:`~Series.str.count`;Count occurrences of pattern - :meth:`~Series.str.startswith`;Equivalent to ``str.startswith(pat)`` for each element - :meth:`~Series.str.endswith`;Equivalent to ``str.endswith(pat)`` for each element - :meth:`~Series.str.findall`;Compute list of all occurrences of pattern/regex for each string - :meth:`~Series.str.match`;"Call ``re.match`` on each element, returning matched groups as list" - :meth:`~Series.str.extract`;"Call ``re.search`` on each element, returning DataFrame with one row for each element and one column for each regex capture group" - :meth:`~Series.str.extractall`;"Call ``re.findall`` on each element, returning DataFrame with one row for each match and one column for each regex capture group" - :meth:`~Series.str.len`;Compute string lengths - :meth:`~Series.str.strip`;Equivalent to ``str.strip`` - :meth:`~Series.str.rstrip`;Equivalent to ``str.rstrip`` - :meth:`~Series.str.lstrip`;Equivalent to ``str.lstrip`` - :meth:`~Series.str.partition`;Equivalent to ``str.partition`` - :meth:`~Series.str.rpartition`;Equivalent to ``str.rpartition`` - :meth:`~Series.str.lower`;Equivalent to ``str.lower`` - :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` - :meth:`~Series.str.upper`;Equivalent to ``str.upper`` - :meth:`~Series.str.find`;Equivalent to ``str.find`` - :meth:`~Series.str.rfind`;Equivalent to ``str.rfind`` - :meth:`~Series.str.index`;Equivalent to ``str.index`` - :meth:`~Series.str.rindex`;Equivalent to ``str.rindex`` - :meth:`~Series.str.capitalize`;Equivalent to ``str.capitalize`` - :meth:`~Series.str.swapcase`;Equivalent to ``str.swapcase`` - :meth:`~Series.str.normalize`;Return Unicode normal form. Equivalent to ``unicodedata.normalize`` - :meth:`~Series.str.translate`;Equivalent to ``str.translate`` - :meth:`~Series.str.isalnum`;Equivalent to ``str.isalnum`` - :meth:`~Series.str.isalpha`;Equivalent to ``str.isalpha`` - :meth:`~Series.str.isdigit`;Equivalent to ``str.isdigit`` - :meth:`~Series.str.isspace`;Equivalent to ``str.isspace`` - :meth:`~Series.str.islower`;Equivalent to ``str.islower`` - :meth:`~Series.str.isupper`;Equivalent to ``str.isupper`` - :meth:`~Series.str.istitle`;Equivalent to ``str.istitle`` - :meth:`~Series.str.isnumeric`;Equivalent to ``str.isnumeric`` - :meth:`~Series.str.isdecimal`;Equivalent to ``str.isdecimal`` + + :meth:`~Series.str.cat`,Concatenate strings + :meth:`~Series.str.split`,Split strings on delimiter + :meth:`~Series.str.rsplit`,Split strings on delimiter working from the end of the string + :meth:`~Series.str.get`,Index into each element (retrieve i-th element) + :meth:`~Series.str.join`,Join strings in each element of the Series with passed separator + :meth:`~Series.str.get_dummies`,Split strings on the delimiter returning DataFrame of dummy variables + :meth:`~Series.str.contains`,Return boolean array if each string contains pattern/regex + :meth:`~Series.str.replace`,Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence + :meth:`~Series.str.removeprefix`,Remove prefix from string i.e. only remove if string starts with prefix. + :meth:`~Series.str.removesuffix`,Remove suffix from string i.e. only remove if string ends with suffix. + :meth:`~Series.str.repeat`,Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``) + :meth:`~Series.str.pad`,Add whitespace to the sides of strings + :meth:`~Series.str.center`,Equivalent to ``str.center`` + :meth:`~Series.str.ljust`,Equivalent to ``str.ljust`` + :meth:`~Series.str.rjust`,Equivalent to ``str.rjust`` + :meth:`~Series.str.zfill`,Equivalent to ``str.zfill`` + :meth:`~Series.str.wrap`,Split long strings into lines with length less than a given width + :meth:`~Series.str.slice`,Slice each string in the Series + :meth:`~Series.str.slice_replace`,Replace slice in each string with passed value + :meth:`~Series.str.count`,Count occurrences of pattern + :meth:`~Series.str.startswith`,Equivalent to ``str.startswith(pat)`` for each element + :meth:`~Series.str.endswith`,Equivalent to ``str.endswith(pat)`` for each element + :meth:`~Series.str.findall`,Compute list of all occurrences of pattern/regex for each string + :meth:`~Series.str.match`,Call ``re.match`` on each element returning matched groups as list + :meth:`~Series.str.extract`,Call ``re.search`` on each element returning DataFrame with one row for each element and one column for each regex capture group + :meth:`~Series.str.extractall`,Call ``re.findall`` on each element returning DataFrame with one row for each match and one column for each regex capture group + :meth:`~Series.str.len`,Compute string lengths + :meth:`~Series.str.strip`,Equivalent to ``str.strip`` + :meth:`~Series.str.rstrip`,Equivalent to ``str.rstrip`` + :meth:`~Series.str.lstrip`,Equivalent to ``str.lstrip`` + :meth:`~Series.str.partition`,Equivalent to ``str.partition`` + :meth:`~Series.str.rpartition`,Equivalent to ``str.rpartition`` + :meth:`~Series.str.lower`,Equivalent to ``str.lower`` + :meth:`~Series.str.casefold`,Equivalent to ``str.casefold`` + :meth:`~Series.str.upper`,Equivalent to ``str.upper`` + :meth:`~Series.str.find`,Equivalent to ``str.find`` + :meth:`~Series.str.rfind`,Equivalent to ``str.rfind`` + :meth:`~Series.str.index`,Equivalent to ``str.index`` + :meth:`~Series.str.rindex`,Equivalent to ``str.rindex`` + :meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize`` + :meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase`` + :meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize`` + :meth:`~Series.str.translate`,Equivalent to ``str.translate`` + :meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum`` + :meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha`` + :meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit`` + :meth:`~Series.str.isspace`,Equivalent to ``str.isspace`` + :meth:`~Series.str.islower`,Equivalent to ``str.islower`` + :meth:`~Series.str.isupper`,Equivalent to ``str.isupper`` + :meth:`~Series.str.istitle`,Equivalent to ``str.istitle`` + :meth:`~Series.str.isnumeric`,Equivalent to ``str.isnumeric`` + :meth:`~Series.str.isdecimal`,Equivalent to ``str.isdecimal`` diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9e7349a061295..278971ef88a0f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -253,6 +253,7 @@ Removal of prior version deprecations/changes - Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) - Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) +- Enforced deprecation of ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock`` (:issue:`58467`) - Enforced deprecation of non-standard (``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`) - Enforced deprecation of parsing system timezone strings to ``tzlocal``, which depended on system timezone, pass the 'tz' keyword instead (:issue:`50791`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) @@ -414,7 +415,7 @@ Indexing Missing ^^^^^^^ -- +- Bug in :meth:`DataFrame.fillna` and :meth:`Series.fillna` that would ignore the ``limit`` argument on :class:`.ExtensionArray` dtypes (:issue:`58001`) - MultiIndex @@ -465,7 +466,6 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) -- Styler ^^^^^^ diff --git a/environment.yml b/environment.yml index d0a612a363878..dcc7aa5280b2c 100644 --- a/environment.yml +++ b/environment.yml @@ -89,7 +89,6 @@ dependencies: - numpydoc - pydata-sphinx-theme=0.14 - pytest-cython # doctest - - docutils < 0.21 # https://github.com/sphinx-doc/sphinx/issues/12302 - sphinx - sphinx-design - sphinx-copybutton diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c7bf11ea75fb4..c1b615839c8cc 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -841,6 +841,12 @@ cdef class _Timestamp(ABCTimestamp): ------- int + See Also + -------- + Timestamp.isoweekday : Return the ISO day of the week represented by the date. + Timestamp.weekday : Return the day of the week represented by the date. + Timestamp.day_of_year : Return day of the year. + Examples -------- >>> ts = pd.Timestamp(2020, 3, 14) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index cbd0221cc2082..7b941e7ea8338 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -330,6 +330,13 @@ def _pad_or_backfill( @doc(ExtensionArray.fillna) def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self: mask = self.isna() + if limit is not None and limit < len(self): + # mypy doesn't like that mask can be an EA which need not have `cumsum` + modify = mask.cumsum() > limit # type: ignore[union-attr] + if modify.any(): + # Only copy mask if necessary + mask = mask.copy() + mask[modify] = False # error: Argument 2 to "check_value_size" has incompatible type # "ExtensionArray"; expected "ndarray" value = missing.check_value_size( diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e363c5380a8e7..ae22f1344e3cf 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -755,7 +755,8 @@ def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: If returning an ExtensionArray, then * ``na_values._is_boolean`` should be True - * `na_values` should implement :func:`ExtensionArray._reduce` + * ``na_values`` should implement :func:`ExtensionArray._reduce` + * ``na_values`` should implement :func:`ExtensionArray._accumulate` * ``na_values.any`` and ``na_values.all`` should be implemented Examples @@ -1058,19 +1059,12 @@ def fillna( Alternatively, an array-like "value" can be given. It's expected that the array-like have the same length as 'self'. limit : int, default None - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill. In other words, if there is - a gap with more than this number of consecutive NaNs, it will only - be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. + The maximum number of entries where NA values will be filled. copy : bool, default True Whether to make a copy of the data before filling. If False, then the original should be modified and no new memory should be allocated. For ExtensionArray subclasses that cannot do this, it is at the author's discretion whether to ignore "copy=False" or to raise. - The base class implementation ignores the keyword in pad/backfill - cases. Returns ------- @@ -1086,6 +1080,15 @@ def fillna( Length: 6, dtype: Int64 """ mask = self.isna() + if limit is not None and limit < len(self): + # isna can return an ExtensionArray, we're assuming that comparisons + # are implemented. + # mypy doesn't like that mask can be an EA which need not have `cumsum` + modify = mask.cumsum() > limit # type: ignore[union-attr] + if modify.any(): + # Only copy mask if necessary + mask = mask.copy() + mask[modify] = False # error: Argument 2 to "check_value_size" has incompatible type # "ExtensionArray"; expected "ndarray" value = missing.check_value_size( diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 86f58b48ea3be..6149adc8bc1a9 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -905,12 +905,7 @@ def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self: value(s) passed should be either Interval objects or NA/NaN. limit : int, default None (Not implemented yet for IntervalArray) - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill. In other words, if there is - a gap with more than this number of consecutive NaNs, it will only - be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. + The maximum number of entries where NA values will be filled. copy : bool, default True Whether to make a copy of the data before filling. If False, then the original should be modified and no new memory should be allocated. @@ -923,6 +918,8 @@ def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self: """ if copy is False: raise NotImplementedError + if limit is not None: + raise ValueError("limit must be None") value_left, value_right = self._validate_scalar(value) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index df794183f67d1..e77c998e796a9 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -232,6 +232,12 @@ def _pad_or_backfill( @doc(ExtensionArray.fillna) def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self: mask = self._mask + if limit is not None and limit < len(self): + modify = mask.cumsum() > limit + if modify.any(): + # Only copy mask if necessary + mask = mask.copy() + mask[modify] = False value = missing.check_value_size(value, mask, len(self)) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 522d86fb165f6..42e1cdd337e62 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -717,6 +717,7 @@ def fillna( ---------- value : scalar limit : int, optional + Not supported for SparseArray, must be None. copy: bool, default True Ignored for SparseArray. @@ -736,6 +737,8 @@ def fillna( When ``self.fill_value`` is not NA, the result dtype will be ``self.dtype``. Again, this preserves the amount of memory used. """ + if limit is not None: + raise ValueError("limit must be None") new_values = np.where(isna(self.sp_values), value, self.sp_values) if self._null_fill_value: diff --git a/pandas/core/base.py b/pandas/core/base.py index 07a7f784b600e..bee9b3f13c593 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -342,6 +342,12 @@ def shape(self) -> Shape: """ Return a tuple of the shape of the underlying data. + See Also + -------- + Series.ndim : Number of dimensions of the underlying data. + Series.size : Return the number of elements in the underlying data. + Series.nbytes : Return the number of bytes in the underlying data. + Examples -------- >>> s = pd.Series([1, 2, 3]) @@ -903,6 +909,11 @@ def hasnans(self) -> bool: ------- bool + See Also + -------- + Series.isna : Detect missing values. + Series.notna : Detect existing (non-missing) values. + Examples -------- >>> s = pd.Series([1, 2, 3, None]) @@ -1102,6 +1113,12 @@ def is_unique(self) -> bool: ------- bool + See Also + -------- + Series.unique : Return unique values of Series object. + Series.drop_duplicates : Return Series with duplicate values removed. + Series.duplicated : Indicate duplicate Series values. + Examples -------- >>> s = pd.Series([1, 2, 3]) @@ -1123,6 +1140,11 @@ def is_monotonic_increasing(self) -> bool: ------- bool + See Also + -------- + Series.is_monotonic_decreasing : Return boolean if values in the object are + monotonically decreasing. + Examples -------- >>> s = pd.Series([1, 2, 2]) @@ -1146,6 +1168,11 @@ def is_monotonic_decreasing(self) -> bool: ------- bool + See Also + -------- + Series.is_monotonic_increasing : Return boolean if values in the object are + monotonically increasing. + Examples -------- >>> s = pd.Series([3, 2, 2, 1]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 24727bb9d83c1..20d8bd3bda21f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6622,7 +6622,7 @@ def convert_dtypes( dtype_backend: DtypeBackend = "numpy_nullable", ) -> Self: """ - Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``. + Convert columns from numpy dtypes to the best dtypes that support ``pd.NA``. Parameters ---------- @@ -6639,13 +6639,13 @@ def convert_dtypes( If `convert_integer` is also True, preference will be give to integer dtypes if the floats can be faithfully casted to integers. dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' - Back-end data type applied to the resultant :class:`DataFrame` - (still experimental). Behaviour is as follows: + Back-end data type applied to the resultant :class:`DataFrame` or + :class:`Series` (still experimental). Behaviour is as follows: * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` - (default). + or :class:`Series` (default). * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` - DataFrame. + DataFrame or Series. .. versionadded:: 2.0 diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 89c8a4a27ca31..45758379e0bd6 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -6,58 +6,8 @@ ) __all__ = [ - "Block", - "DatetimeTZBlock", - "ExtensionBlock", "make_block", "BlockManager", "SingleBlockManager", "concatenate_managers", ] - - -def __getattr__(name: str): - # GH#55139 - import warnings - - if name == "create_block_manager_from_blocks": - # GH#33892 - warnings.warn( - f"{name} is deprecated and will be removed in a future version. " - "Use public APIs instead.", - DeprecationWarning, - # https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758 - # on hard-coding stacklevel - stacklevel=2, - ) - from pandas.core.internals.managers import create_block_manager_from_blocks - - return create_block_manager_from_blocks - - if name in [ - "Block", - "ExtensionBlock", - "DatetimeTZBlock", - ]: - warnings.warn( - f"{name} is deprecated and will be removed in a future version. " - "Use public APIs instead.", - DeprecationWarning, - # https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758 - # on hard-coding stacklevel - stacklevel=2, - ) - if name == "DatetimeTZBlock": - from pandas.core.internals.blocks import DatetimeTZBlock - - return DatetimeTZBlock - elif name == "ExtensionBlock": - from pandas.core.internals.blocks import ExtensionBlock - - return ExtensionBlock - else: - from pandas.core.internals.blocks import Block - - return Block - - raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 28d3292a1c65b..6107c009b5bf1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1863,6 +1863,8 @@ def fillna( ) -> list[Block]: if isinstance(self.dtype, IntervalDtype): # Block.fillna handles coercion (test_fillna_interval) + if limit is not None: + raise ValueError("limit must be None") return super().fillna( value=value, limit=limit, diff --git a/pandas/core/series.py b/pandas/core/series.py index 2065a2ad5530e..8474fd0d74919 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -626,6 +626,13 @@ def dtype(self) -> DtypeObj: """ Return the dtype object of the underlying data. + See Also + -------- + Series.dtypes : Return the dtype object of the underlying data. + Series.astype : Cast a pandas object to a specified dtype dtype. + Series.convert_dtypes : Convert columns to the best possible dtypes using dtypes + supporting pd.NA. + Examples -------- >>> s = pd.Series([1, 2, 3]) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5d325397a81ae..4fce338ccad6f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -125,7 +125,8 @@ npt, ) - from pandas.core.internals import Block + from pandas.core.internals.blocks import Block + # versioning attribute _version = "0.15.2" diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 225a3301b8b8c..b951d4c35d208 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -299,6 +299,20 @@ def test_factorize_empty(self, data): tm.assert_numpy_array_equal(codes, expected_codes) tm.assert_extension_array_equal(uniques, expected_uniques) + def test_fillna_limit_frame(self, data_missing): + # GH#58001 + df = pd.DataFrame({"A": data_missing.take([0, 1, 0, 1])}) + expected = pd.DataFrame({"A": data_missing.take([1, 1, 0, 1])}) + result = df.fillna(value=data_missing[1], limit=1) + tm.assert_frame_equal(result, expected) + + def test_fillna_limit_series(self, data_missing): + # GH#58001 + ser = pd.Series(data_missing.take([0, 1, 0, 1])) + expected = pd.Series(data_missing.take([1, 1, 0, 1])) + result = ser.fillna(value=data_missing[1], limit=1) + tm.assert_series_equal(result, expected) + def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) df = pd.DataFrame({"A": arr}) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 504bafc145108..6f18761f77138 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -152,6 +152,22 @@ def test_fillna_with_none(self, data_missing): with pytest.raises(TypeError, match=msg): super().test_fillna_with_none(data_missing) + def test_fillna_limit_frame(self, data_missing): + # GH#58001 + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + DeprecationWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_limit_frame(data_missing) + + def test_fillna_limit_series(self, data_missing): + # GH#58001 + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + DeprecationWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_limit_series(data_missing) + @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna): all_data = all_data[:10] diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 22ac9627f6cda..4bc9562f1895d 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -156,6 +156,16 @@ def test_fillna_with_none(self, data_missing): with pytest.raises(AssertionError): super().test_fillna_with_none(data_missing) + @pytest.mark.xfail(reason="fill value is a dictionary, takes incorrect code path") + def test_fillna_limit_frame(self, data_missing): + # GH#58001 + super().test_fillna_limit_frame(data_missing) + + @pytest.mark.xfail(reason="fill value is a dictionary, takes incorrect code path") + def test_fillna_limit_series(self, data_missing): + # GH#58001 + super().test_fillna_limit_frame(data_missing) + @pytest.mark.parametrize( "limit_area, input_ilocs, expected_ilocs", [ diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 6900d6d67f9d9..ec979ac6d22dc 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -84,6 +84,16 @@ class TestIntervalArray(base.ExtensionTests): def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: return op_name in ["min", "max"] + def test_fillna_limit_frame(self, data_missing): + # GH#58001 + with pytest.raises(ValueError, match="limit must be None"): + super().test_fillna_limit_frame(data_missing) + + def test_fillna_limit_series(self, data_missing): + # GH#58001 + with pytest.raises(ValueError, match="limit must be None"): + super().test_fillna_limit_frame(data_missing) + @pytest.mark.xfail( reason="Raises with incorrect message bc it disallows *all* listlikes " "instead of just wrong-length listlikes" diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index ca79c13ed44e4..79cfb736941d6 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -205,6 +205,18 @@ def test_shift_fill_value(self, data): # np.array shape inference. Shift implementation fails. super().test_shift_fill_value(data) + @skip_nested + def test_fillna_limit_frame(self, data_missing): + # GH#58001 + # The "scalar" for this array isn't a scalar. + super().test_fillna_limit_frame(data_missing) + + @skip_nested + def test_fillna_limit_series(self, data_missing): + # GH#58001 + # The "scalar" for this array isn't a scalar. + super().test_fillna_limit_series(data_missing) + @skip_nested def test_fillna_copy_frame(self, data_missing): # The "scalar" for this array isn't a scalar. diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 5595a9ca44d05..56c023d99bb1c 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -264,6 +264,16 @@ def test_fillna_frame(self, data_missing): tm.assert_frame_equal(result, expected) + def test_fillna_limit_frame(self, data_missing): + # GH#58001 + with pytest.raises(ValueError, match="limit must be None"): + super().test_fillna_limit_frame(data_missing) + + def test_fillna_limit_series(self, data_missing): + # GH#58001 + with pytest.raises(ValueError, match="limit must be None"): + super().test_fillna_limit_frame(data_missing) + _combine_le_expected_dtype = "Sparse[bool]" def test_fillna_copy_frame(self, data_missing): diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index 7ab8988521fdf..c189d5248b1f3 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -41,21 +41,6 @@ def test_namespace(): assert set(result) == set(expected + modules) -@pytest.mark.parametrize( - "name", - [ - "Block", - "ExtensionBlock", - "DatetimeTZBlock", - ], -) -def test_deprecations(name): - # GH#55139 - msg = f"{name} is deprecated.* Use public APIs instead" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - getattr(internals, name) - - def test_make_block_2d_with_dti(): # GH#41168 dti = pd.date_range("2012", periods=3, tz="UTC") @@ -65,18 +50,6 @@ def test_make_block_2d_with_dti(): assert blk.values.shape == (1, 3) -def test_create_block_manager_from_blocks_deprecated(): - # GH#33892 - # If they must, downstream packages should get this from internals.api, - # not internals. - msg = ( - "create_block_manager_from_blocks is deprecated and will be " - "removed in a future version. Use public APIs instead" - ) - with tm.assert_produces_warning(DeprecationWarning, match=msg): - internals.create_block_manager_from_blocks - - def test_create_dataframe_from_blocks(float_frame): block = float_frame._mgr.blocks[0] index = float_frame.index.copy() diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 55be48eb572fd..2860b3a6483af 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -655,6 +655,7 @@ def test_read_empty_array(self, pa, dtype): "value": pd.array([], dtype=dtype), } ) + pytest.importorskip("pyarrow", "11.0.0") # GH 45694 expected = None if dtype == "float": @@ -671,6 +672,7 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): df = df_full + pytest.importorskip("pyarrow", "11.0.0") # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") @@ -938,6 +940,8 @@ def test_timestamp_nanoseconds(self, pa): check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): + pytest.importorskip("pyarrow", "11.0.0") + if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.applymarker( pytest.mark.xfail( diff --git a/requirements-dev.txt b/requirements-dev.txt index 6c5764bf589cc..f5da7f70ccdba 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -64,7 +64,6 @@ natsort numpydoc pydata-sphinx-theme==0.14 pytest-cython -docutils < 0.21 sphinx sphinx-design sphinx-copybutton