Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into Validate-blanks
Browse files Browse the repository at this point in the history
  • Loading branch information
Ayowolet committed Sep 19, 2019
2 parents 1fc9b0f + 4ac7f9d commit 689c59a
Show file tree
Hide file tree
Showing 27 changed files with 167 additions and 254 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ repos:
hooks:
- id: isort
language: python_venv
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the

All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.

A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.

If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.

Expand Down
23 changes: 17 additions & 6 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ jobs:
displayName: 'Running benchmarks'
condition: true
- job: 'Docs'
- job: 'Web_and_Docs'
pool:
vmImage: ubuntu-16.04
timeoutInMinutes: 90
Expand All @@ -119,6 +119,11 @@ jobs:
ci/setup_env.sh
displayName: 'Setup environment and build pandas'
- script: |
source activate pandas-dev
python web/pandas_web.py web/pandas --target-path=web/build
displayName: 'Build website'
- script: |
source activate pandas-dev
# Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
Expand All @@ -128,15 +133,21 @@ jobs:
displayName: 'Build documentation'
- script: |
cd doc/build/html
mkdir -p to_deploy/docs
cp -r web/build/* to_deploy/
cp -r doc/build/html/* to_deploy/docs/
displayName: 'Merge website and docs'
- script: |
cd to_deploy
git init
touch .nojekyll
echo "dev.pandas.io" > CNAME
printf "User-agent: *\nDisallow: /" > robots.txt
git add --all .
git config user.email "[email protected]"
git config user.name "pandas-docs-bot"
git commit -m "pandas documentation in master"
git config user.name "pandas-bot"
git commit -m "pandas web and documentation in master"
displayName: 'Create git repo for docs build'
condition : |
and(not(eq(variables['Build.Reason'], 'PullRequest')),
Expand All @@ -160,10 +171,10 @@ jobs:
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
- script: |
cd doc/build/html
cd to_deploy
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
git push -f origin master
displayName: 'Publish docs to GitHub pages'
displayName: 'Publish web and docs to GitHub pages'
condition : |
and(not(eq(variables['Build.Reason'], 'PullRequest')),
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
7 changes: 7 additions & 0 deletions ci/azure/posix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,21 @@ jobs:
echo "Creating Environment"
ci/setup_env.sh
displayName: 'Setup environment and build pandas'
- script: |
source activate pandas-dev
ci/run_tests.sh
displayName: 'Test'
- script: source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
displayName: 'Build versions'

- task: PublishTestResults@2
inputs:
testResultsFiles: 'test-data-*.xml'
testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
displayName: 'Publish test results'

- powershell: |
$junitXml = "test-data-single.xml"
$(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
Expand All @@ -94,6 +100,7 @@ jobs:
Write-Error "$($matches[1]) tests failed"
}
displayName: 'Check for test failures'
- script: |
source activate pandas-dev
python ci/print_skipped.py
Expand Down
58 changes: 23 additions & 35 deletions ci/print_skipped.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,40 @@
#!/usr/bin/env python

import math
import os
import sys
import xml.etree.ElementTree as et


def parse_results(filename):
def main(filename):
if not os.path.isfile(filename):
return

tree = et.parse(filename)
root = tree.getroot()
skipped = []

current_class = ""
i = 1
assert i - 1 == len(skipped)
for el in root.findall("testcase"):
cn = el.attrib["classname"]
for sk in el.findall("skipped"):
old_class = current_class
current_class = cn
name = "{classname}.{name}".format(
classname=current_class, name=el.attrib["name"]
)
msg = sk.attrib["message"]
out = ""
if old_class != current_class:
ndigits = int(math.log(i, 10) + 1)

# 4 for : + space + # + space
out += "-" * (len(name + msg) + 4 + ndigits) + "\n"
out += "#{i} {name}: {msg}".format(i=i, name=name, msg=msg)
skipped.append(out)
i += 1
assert i - 1 == len(skipped)
assert i - 1 == len(skipped)
# assert len(skipped) == int(root.attrib['skip'])
return "\n".join(skipped)


def main():
test_files = ["test-data-single.xml", "test-data-multiple.xml", "test-data.xml"]

print("SKIPPED TESTS:")
for fn in test_files:
if os.path.isfile(fn):
print(parse_results(fn))
return 0
yield None
yield {
"class_name": current_class,
"test_name": el.attrib["name"],
"message": sk.attrib["message"],
}


if __name__ == "__main__":
sys.exit(main())
print("SKIPPED TESTS:")
i = 1
for file_type in ("-single", "-multiple", ""):
for test_data in main("test-data{}.xml".format(file_type)):
if test_data is None:
print("-" * 80)
else:
print(
"#{i} {class_name}.{test_name}: {message}".format(
**dict(test_data, i=i)
)
)
i += 1
13 changes: 3 additions & 10 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
#!/bin/bash
#!/bin/bash -e

set -e

if [ "$DOC" ]; then
echo "We are not running pytest as this is a doc-build"
exit 0
fi

# Workaround for pytest-xdist flaky collection order
# Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set)
# https://github.com/pytest-dev/pytest/issues/920
# https://github.com/pytest-dev/pytest/issues/1075
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
Expand All @@ -16,7 +9,7 @@ if [ -n "$LOCALE_OVERRIDE" ]; then
export LC_ALL="$LOCALE_OVERRIDE"
export LANG="$LOCALE_OVERRIDE"
PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
if [[ "$LOCALE_OVERIDE" != "$PANDAS_LOCALE" ]]; then
if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
# TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
# exit 1
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ If installed, we now require:
| pytest (dev) | 4.0.2 | |
+-----------------+-----------------+----------+
For `optional libraries <https://dev.pandas.io/install.html#dependencies>`_ the general recommendation is to use the latest version.
For `optional libraries <https://dev.pandas.io/docs/install.html#dependencies>`_ the general recommendation is to use the latest version.
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
Optional libraries below the lowest tested version may still work, but are not considered supported.
Expand Down
5 changes: 4 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ Removal of prior version deprecations/changes
- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`)
- Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
- Removed the previously deprecated ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`)
-

.. _whatsnew_1000.performance:

Expand Down Expand Up @@ -147,7 +149,7 @@ Datetimelike
- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
-
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)


Timedelta
Expand Down Expand Up @@ -218,6 +220,7 @@ I/O
- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`)
- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)
- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`)
- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`)

Plotting
^^^^^^^^
Expand Down
19 changes: 1 addition & 18 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,24 +1353,7 @@ def __setstate__(self, state):
if not isinstance(state, dict):
raise Exception("invalid pickle state")

# Provide compatibility with pre-0.15.0 Categoricals.
if "_categories" not in state and "_levels" in state:
state["_categories"] = self.dtype.validate_categories(state.pop("_levels"))
if "_codes" not in state and "labels" in state:
state["_codes"] = coerce_indexer_dtype(
state.pop("labels"), state["_categories"]
)

# 0.16.0 ordered change
if "_ordered" not in state:

# >=15.0 < 0.16.0
if "ordered" in state:
state["_ordered"] = state.pop("ordered")
else:
state["_ordered"] = False

# 0.21.0 CategoricalDtype change
# compat with pre 0.21.0 CategoricalDtype change
if "_dtype" not in state:
state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"])

Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pandas import DataFrame, get_option

from pandas.io.common import get_filepath_or_buffer, is_s3_url
from pandas.io.common import get_filepath_or_buffer, is_gcs_url, is_s3_url


def get_engine(engine):
Expand Down Expand Up @@ -159,12 +159,12 @@ def write(
if partition_cols is not None:
kwargs["file_scheme"] = "hive"

if is_s3_url(path):
# path is s3:// so we need to open the s3file in 'wb' mode.
if is_s3_url(path) or is_gcs_url(path):
# if path is s3:// or gs:// we need to open the file in 'wb' mode.
# TODO: Support 'ab'

path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
# And pass the opened s3file to the fastparquet internal impl.
# And pass the opened file to the fastparquet internal impl.
kwargs["open_with"] = lambda path, _: path
else:
path, _, _, _ = get_filepath_or_buffer(path)
Expand Down
43 changes: 22 additions & 21 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,32 +528,33 @@ def test_as_array_datetime_tz(self):
assert mgr.get("g").dtype == "datetime64[ns, CET]"
assert mgr.as_array().dtype == "object"

def test_astype(self):
@pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
def test_astype(self, t):
# coerce all
mgr = create_mgr("c: f4; d: f2; e: f8")
for t in ["float16", "float32", "float64", "int32", "int64"]:
t = np.dtype(t)
tmgr = mgr.astype(t)
assert tmgr.get("c").dtype.type == t
assert tmgr.get("d").dtype.type == t
assert tmgr.get("e").dtype.type == t

t = np.dtype(t)
tmgr = mgr.astype(t)
assert tmgr.get("c").dtype.type == t
assert tmgr.get("d").dtype.type == t
assert tmgr.get("e").dtype.type == t

# mixed
mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
for t in ["float16", "float32", "float64", "int32", "int64"]:
t = np.dtype(t)
tmgr = mgr.astype(t, errors="ignore")
assert tmgr.get("c").dtype.type == t
assert tmgr.get("e").dtype.type == t
assert tmgr.get("f").dtype.type == t
assert tmgr.get("g").dtype.type == t

assert tmgr.get("a").dtype.type == np.object_
assert tmgr.get("b").dtype.type == np.object_
if t != np.int64:
assert tmgr.get("d").dtype.type == np.datetime64
else:
assert tmgr.get("d").dtype.type == t

t = np.dtype(t)
tmgr = mgr.astype(t, errors="ignore")
assert tmgr.get("c").dtype.type == t
assert tmgr.get("e").dtype.type == t
assert tmgr.get("f").dtype.type == t
assert tmgr.get("g").dtype.type == t

assert tmgr.get("a").dtype.type == np.object_
assert tmgr.get("b").dtype.type == np.object_
if t != np.int64:
assert tmgr.get("d").dtype.type == np.datetime64
else:
assert tmgr.get("d").dtype.type == t

def test_convert(self):
def _compare(old_mgr, new_mgr):
Expand Down
Binary file added pandas/tests/io/data/categorical.0.25.0.pickle
Binary file not shown.
Loading

0 comments on commit 689c59a

Please sign in to comment.