Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds pyproject files and tests #1302

Merged
merged 3 commits into from
Nov 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions requirements/pyproject-apex-pip.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-apex-pip"
version = "0.1.0"
description = "Apex pip requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
pip = "23.3.2"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-comet.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-comet"
version = "0.1.0"
description = "Comet ML requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
comet_ml = ">=3.45.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-flashattention.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-flashattention"
version = "0.1.0"
description = "Flash Attention requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
flash-attn = "2.5.6"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
16 changes: 16 additions & 0 deletions requirements/pyproject-mamba.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.poetry]
name = "gpt-neox-mamba"
version = "0.1.0"
description = "Mamba requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
causal_conv1d = ">=1.1.0"
einops = "*"
mamba_ssm = ">=1.2.0.post1"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
23 changes: 23 additions & 0 deletions requirements/pyproject-neox-dev.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[tool.poetry]
name = "gpt-neox-dev"
version = "0.1.0"
description = "Development requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
autopep8 = ">=1.5.6"
clang-format = ">=13.0.1"
pre-commit = ">=2.17.0"
pytest = ">=6.2.3"
pytest-cov = ">=2.11.1"
pytest-forked = ">=1.3.0"
pytest-html = "4.1.1"
pytest-xdist = "*"
toml = ">=0.10.2"
packaging = ">=23.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-onebitadam.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-onebitadam"
version = "0.1.0"
description = "OneBitAdam requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
cupy-cuda111 = ">=8.6.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
15 changes: 15 additions & 0 deletions requirements/pyproject-s3.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[tool.poetry]
name = "gpt-neox-s3"
version = "0.1.0"
description = "S3 requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
boto3 = "*"
hf-transfer = ">=0.1.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-sparseattention.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-sparseattention"
version = "0.1.0"
description = "Sparse Attention requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
triton = "2.1.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-tensorboard.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-tensorboard"
version = "0.1.0"
description = "TensorBoard requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
tensorboard = "2.13.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-transformerengine.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-transformerengine"
version = "0.1.0"
description = "Transformer Engine requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
transformer-engine = {git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "stable"}

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-wandb.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-wandb"
version = "0.1.0"
description = "Weights & Biases requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
wandb = ">=0.10.28"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
33 changes: 33 additions & 0 deletions requirements/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[tool.poetry]
name = "gpt-neox"
version = "2.0.0"
description = "An open-source library for training large-scale language models on GPUs"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"
readme = "README.md"
homepage = "https://www.github.com/eleutherai/gpt-neox"
repository = "https://www.github.com/eleutherai/gpt-neox"
documentation = "https://www.github.com/eleutherai/gpt-neox"

[tool.poetry.dependencies]
python = "^3.8"
deepspeed = {git = "https://github.com/EleutherAI/DeeperSpeed.git", rev = "02e2ebf7dee6aaab3d89094ed470a4609763c742"}
ftfy = "^6.0.1"
huggingface_hub = "^0.11.0"
jinja2 = "3.1.4"
lm_dataformat = {git = "https://github.com/EleutherAI/lm_dataformat.git", rev = "4eec05349977071bf67fc072290b95e31c8dd836"}
lm_eval = ">=0.4.0,<=0.4.1"
mpi4py = "^3.0.3"
numpy = "<2.0"
pybind11 = "^2.6.2"
regex = "*"
sentencepiece = "*"
six = "*"
tiktoken = "^0.1.2"
tokenizers = "^0.12.1"
transformers = "4.38.0"
toml = "*"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
2 changes: 2 additions & 0 deletions requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
autopep8>=1.5.6
clang-format>=13.0.1
packaging>=23.0
pre-commit>=2.17.0
pytest>=6.2.3
pytest-cov>=2.11.1
pytest-forked>=1.3.0
pytest-html==4.1.1
pytest-xdist
toml>=0.10.2
131 changes: 131 additions & 0 deletions tests/requirements/test_requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import pytest
import toml
from pathlib import Path
from typing import Dict, List, Optional
from packaging.version import parse as parse_version, Version
from dataclasses import dataclass


@dataclass
class Dependency:
name: str
version: Optional[str] = None

@classmethod
def from_requirement(cls, requirement: str) -> "Dependency":
"""Parse a requirement string into a Dependency object."""
# Common version specifiers
specifiers = ["==", ">=", ">", "<=", "<"]
name = requirement
version = None

for spec in specifiers:
if spec in requirement:
name, version = requirement.split(spec, 1)
version = version.strip()
break

return cls(name.lower().strip(), version)

def matches_version(self, other_version: str) -> bool:
"""Check if this dependency's version matches another version string."""
if not self.version or not other_version:
return True

try:
# Convert versions to comparable objects
our_version = parse_version(self.version)
their_version = parse_version(other_version.replace("*", "0"))
return our_version == their_version
except ValueError:
# If versions can't be parsed, fall back to string comparison
return self.version.replace("*", "0") == other_version.replace("*", "0")


class DependencyValidator:
def __init__(self, requirements_dir: Path):
self.requirements_dir = requirements_dir

def parse_requirements(self, file_path: Path) -> List[Dependency]:
"""Parse requirements.txt file into a list of Dependencies."""
try:
with open(file_path, "r") as f:
lines = [
line.strip()
for line in f
if line.strip() and not line.startswith("#")
]
return [Dependency.from_requirement(line) for line in lines]
except FileNotFoundError:
raise FileNotFoundError(f"Requirements file not found: {file_path}")
except Exception as e:
raise ValueError(f"Error parsing requirements file {file_path}: {str(e)}")

def parse_pyproject(self, file_path: Path) -> Dict[str, str]:
"""Parse pyproject.toml file and extract dependencies."""
try:
with open(file_path, "r") as f:
pyproject_data = toml.load(f)
return {
name.lower(): str(version)
for name, version in pyproject_data["tool"]["poetry"][
"dependencies"
].items()
if name.lower() != "python" # Exclude Python version
}
except FileNotFoundError:
raise FileNotFoundError(f"pyproject.toml file not found: {file_path}")
except Exception as e:
raise ValueError(f"Error parsing pyproject.toml {file_path}: {str(e)}")

def compare_dependencies(
self, req_deps: List[Dependency], pyproject_deps: Dict[str, str]
) -> tuple[bool, List[str]]:
"""Compare dependencies between requirements.txt and pyproject.toml."""
mismatches = []

for req in req_deps:
if req.name not in pyproject_deps:
mismatches.append(
f"Dependency '{req.name}' not found in pyproject.toml"
)
continue

if not req.matches_version(pyproject_deps[req.name]):
mismatches.append(
f"Version mismatch for '{req.name}': "
f"requirements.txt={req.version}, "
f"pyproject.toml={pyproject_deps[req.name]}"
)

return len(mismatches) == 0, mismatches


def get_corresponding_pyproject(req_file: Path) -> Path:
"""Get the corresponding pyproject.toml file for a requirements file."""
env_name = req_file.stem.split("-")[1]
return req_file.parent / f"pyproject-{env_name}.toml"


@pytest.mark.parametrize("req_file", Path("requirements").glob("requirements-*.txt"))
def test_pyproject_matches_requirements(req_file: Path):
"""Test that requirements.txt dependencies match pyproject.toml dependencies."""
validator = DependencyValidator(req_file.parent)
pyproject_file = get_corresponding_pyproject(req_file)

# Parse both dependency files
req_deps = validator.parse_requirements(req_file)
pyproject_deps = validator.parse_pyproject(pyproject_file)

# Compare dependencies and get detailed mismatches
is_match, mismatches = validator.compare_dependencies(req_deps, pyproject_deps)

# Create detailed error message if there are mismatches
if not is_match:
error_msg = "\n".join(
[
f"\nDependency mismatches found between {req_file} and {pyproject_file}:",
*[f"- {msg}" for msg in mismatches],
]
)
pytest.fail(error_msg)
Loading