Skip to content

Commit

Permalink
Fix pep517 builds (#23)
Browse files Browse the repository at this point in the history
* Fix pep517 builds

`python3 -m pep517.check` now passes

* fix actions

* try again

* skip 36

* build portable

* build portable

* build portable

* build portable

* build portable

* check

* check

* adjust

* fix

* pytest

* pytest

* pytest

* pytest
  • Loading branch information
bdraco authored Feb 21, 2023
1 parent 24307ea commit f2efad3
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 92 deletions.
12 changes: 5 additions & 7 deletions .github/workflows/build-and-upload-to-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,12 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-11]
os: [ubuntu-latest, macos-11, windows-2019]

steps:
- uses: actions/checkout@v3

- name: Checkout submodules
shell: bash
run: |
git submodule sync --recursive
git submodule update --init --force --recursive --depth=1
with:
submodules: 'src/ext/uchardet'

- name: Set up QEMU
if: runner.os == 'Linux'
Expand All @@ -52,6 +48,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: 'src/ext/uchardet'

- name: Build sdist
run: pipx run build --sdist
Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@ environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/lo
before-build = [
"git submodule sync --recursive",
"git submodule update --init --force --recursive --depth=1",
"test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake .. && make && make install)",
]

[tool.cibuildwheel.macos]
environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/local/lib/"}
before-build = [
"git submodule sync --recursive",
"git submodule update --init --force --recursive --depth=1",
"test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake -DCMAKE_MACOSX_RPATH=1 -DCMAKE_INSTALL_NAME_DIR=$LIBRARY_PATH -DCMAKE_BUILD_RPATH=$LIBRARY_PATH .. && make && make install)",
]

[tool.cibuildwheel.windows]
Expand Down
123 changes: 66 additions & 57 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
# coding: utf-8

import os
import sys
import glob
import codecs
import re
import pkgconfig
from distutils.command.build_ext import build_ext
from distutils import sysconfig

Expand All @@ -17,19 +14,31 @@

from Cython.Build import cythonize

cchardet_dir = os.path.join("src", "cchardet") + os.path.sep

try:
ext_args = pkgconfig.parse('uchardet')
except pkgconfig.PackageNotFoundError:
include_path = os.environ.get('INCLUDE_PATH')
library_path = os.environ.get('LIBRARY_PATH')
join = os.path.join

cchardet_dir = join("src", "cchardet") + os.path.sep
uchardet_dir = join("src", "ext", "uchardet", "src")
uchardet_lang_models_dir = join(uchardet_dir, "LangModels")

cchardet_sources = [join("src", "cchardet", "_cchardet.pyx")]
uchardet_sources = [
join(uchardet_dir, file)
for file in os.listdir(uchardet_dir)
if file.endswith(".cpp")
]
uchardet_lang_source = [
join(uchardet_lang_models_dir, file)
for file in os.listdir(uchardet_lang_models_dir)
if file.endswith(".cpp")
]
sources = cchardet_sources + uchardet_sources + uchardet_lang_source

ext_args = {
"include_dirs": uchardet_dir.split(os.pathsep),
"library_dirs": uchardet_dir.split(os.pathsep),
}

ext_args = {
'include_dirs': include_path.split(os.pathsep) if include_path else [],
'library_dirs': library_path.split(os.pathsep) if library_path else [],
'libraries': ['uchardet'],
}

# Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++.
cfg_vars = sysconfig.get_config_vars()
Expand All @@ -40,61 +49,61 @@
# cfg_vars[key] = value.replace("-O2", "-O3")


cchardet_module = Extension(
'cchardet._cchardet',
[
os.path.join('src', 'cchardet', '_cchardet.pyx')
],
language='c++',
**ext_args
)
cchardet_module = Extension("cchardet._cchardet", sources, language="c++", **ext_args)


def read(f):
return open(os.path.join(os.path.dirname(__file__), f)).read().strip()


with codecs.open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'src', 'cchardet', 'version.py'), 'r', 'latin1') as fp:
with codecs.open(
os.path.join(
os.path.abspath(os.path.dirname(__file__)), "src", "cchardet", "version.py"
),
"r",
"latin1",
) as fp:
try:
version = re.findall(
r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0]
version = re.findall(r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0]
except IndexError:
raise RuntimeError('Unable to determine version.')
raise RuntimeError("Unable to determine version.")

setup(
name='faust-cchardet',
author='PyYoshi',
author_email='[email protected]',
url=r'https://github.com/faust-streaming/cChardet',
description='cChardet is high speed universal character encoding detector.',
long_description='\n\n'.join((read('README.rst'), read('CHANGES.rst'))),
name="faust-cchardet",
author="PyYoshi",
author_email="[email protected]",
url=r"https://github.com/faust-streaming/cChardet",
description="cChardet is high speed universal character encoding detector.",
long_description="\n\n".join((read("README.rst"), read("CHANGES.rst"))),
version=version,
license='Mozilla Public License',
license="Mozilla Public License",
classifiers=[
'License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)',
'License :: OSI Approved :: GNU General Public License (GPL)',
'License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)',
'Programming Language :: Cython',
'Programming Language :: Python',
'Topic :: Software Development :: Libraries',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
"License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)",
"License :: OSI Approved :: GNU General Public License (GPL)",
"License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
"Programming Language :: Cython",
"Programming Language :: Python",
"Topic :: Software Development :: Libraries",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
],
keywords=[
'cython',
'chardet',
'charsetdetect'
keywords=["cython", "chardet", "charsetdetect"],
cmdclass={"build_ext": build_ext},
package_dir={"": "src"},
packages=[
"cchardet",
],
cmdclass={'build_ext': build_ext},
package_dir={'': 'src'},
packages=['cchardet', ],
scripts=['bin/cchardetect'],
ext_modules=cythonize([
cchardet_module,
]),
scripts=["bin/cchardetect"],
ext_modules=cythonize(
[
cchardet_module,
],
cplus=True,
compiler_directives={"language_level": "3"}, # Python 3
),
)
72 changes: 46 additions & 26 deletions src/tests/cchardet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,66 @@

import cchardet
import pytest
import sys

SKIP_LIST = [
os.path.join('src','tests','testdata','ja','utf-16le.txt'),
os.path.join('src','tests','testdata','ja','utf-16be.txt'),
os.path.join('src','tests','testdata','es','iso-8859-15.txt'),
os.path.join('src','tests','testdata','da','iso-8859-1.txt'),
os.path.join('src','tests','testdata','he','iso-8859-8.txt'),
os.path.join("src", "tests", "testdata", "ja", "utf-16le.txt"),
os.path.join("src", "tests", "testdata", "ja", "utf-16be.txt"),
os.path.join("src", "tests", "testdata", "es", "iso-8859-15.txt"),
os.path.join("src", "tests", "testdata", "da", "iso-8859-1.txt"),
os.path.join("src", "tests", "testdata", "he", "iso-8859-8.txt"),
]

if sys.maxsize <= 2**32:
# Fails on i686 only, original cchardet test fails too
SKIP_LIST.append(os.path.join("src", "tests", "testdata", "th", "tis-620.txt"))
SKIP_LIST.append(os.path.join("src", "tests", "testdata", "fi", "iso-8859-1.txt"))
SKIP_LIST.append(os.path.join("src", "tests", "testdata", "ga", "iso-8859-1.txt"))

# Python can't decode encoding
SKIP_LIST_02 = [
os.path.join('src','tests','testdata','vi','viscii.txt'),
os.path.join('src','tests','testdata','zh','euc-tw.txt'),
os.path.join("src", "tests", "testdata", "vi", "viscii.txt"),
os.path.join("src", "tests", "testdata", "zh", "euc-tw.txt"),
]

SKIP_LIST_02.extend(SKIP_LIST)


def test_ascii():
detected_encoding = cchardet.detect(b'abcdefghijklmnopqrstuvwxyz')
assert 'ascii' == detected_encoding['encoding'].lower()
detected_encoding = cchardet.detect(b"abcdefghijklmnopqrstuvwxyz")
assert "ascii" == detected_encoding["encoding"].lower()


def test_detect():
testfiles = glob.glob(os.path.join('src','tests','testdata','*','*.txt'))
for testfile in testfiles:
if testfile.replace("\\", "/") in SKIP_LIST:
continue
@pytest.mark.parametrize(
"testfile", glob.glob(os.path.join("src", "tests", "testdata", "*", "*.txt"))
)
def test_detect(testfile):
if testfile.replace("\\", "/") in SKIP_LIST:
return

base = os.path.basename(testfile)
expected_charset = os.path.splitext(base)[0]
with open(testfile, 'rb') as f:
msg = f.read()
detected_encoding = cchardet.detect(msg)
assert expected_charset.lower() == detected_encoding['encoding'].lower()
base = os.path.basename(testfile)
expected_charset = os.path.splitext(base)[0]
with open(testfile, "rb") as f:
msg = f.read()
detected_encoding = cchardet.detect(msg)
assert expected_charset.lower() == detected_encoding["encoding"].lower()


@pytest.mark.skipif(platform.system() == 'Windows', reason="FIXME: Cannot find test file on Windows for some reason")
@pytest.mark.skipif(
platform.system() == "Windows",
reason="FIXME: Cannot find test file on Windows for some reason",
)
def test_detector():
detector = cchardet.UniversalDetector()
with open(os.path.join('src','tests','samples','wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt'), 'rb') as f:
with open(
os.path.join(
"src",
"tests",
"samples",
"wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt",
),
"rb",
) as f:
line = f.readline()
while line:
detector.feed(line)
Expand All @@ -52,14 +72,14 @@ def test_detector():
line = f.readline()
detector.close()
detected_encoding = detector.result
assert "shift_jis" == detected_encoding['encoding'].lower()
assert "shift_jis" == detected_encoding["encoding"].lower()


def test_github_issue_20():
"""
https://github.com/PyYoshi/cChardet/issues/20
"""
msg = b'\x8f'
msg = b"\x8f"

cchardet.detect(msg)

Expand All @@ -69,14 +89,14 @@ def test_github_issue_20():


def test_decode():
testfiles = glob.glob(os.path.join('src','tests','testdata','*','*.txt'))
testfiles = glob.glob(os.path.join("src", "tests", "testdata", "*", "*.txt"))
for testfile in testfiles:
if testfile.replace("\\", "/") in SKIP_LIST_02:
continue

base = os.path.basename(testfile)
expected_charset = os.path.splitext(base)[0]
with open(testfile, 'rb') as f:
with open(testfile, "rb") as f:
msg = f.read()
detected_encoding = cchardet.detect(msg)
try:
Expand Down

0 comments on commit f2efad3

Please sign in to comment.