feat(pt): consistent fine-tuning with init-model #2662

Workflow file for this run

.github/workflows/test_cuda.yml at fd64ee5

	on:
	# manually trigger
	workflow_dispatch:
	pull_request:
	types:
	- "labeled"
	# to let the PR pass the test
	- "opened"
	- "reopened"
	- "synchronize"
	merge_group:
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref \|\| github.run_id }}
	cancel-in-progress: true
	name: Test CUDA
	jobs:
	test_cuda:
	name: Test Python and C++ on CUDA
	runs-on: nvidia
	# https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845
	container:
	image: nvidia/cuda:12.3.1-devel-ubuntu22.04
	options: --gpus all
	if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' \|\| github.event_name == 'workflow_dispatch' \|\| github.event_name == 'merge_group')
	steps:
	- name: Make sudo and git work
	run: apt-get update && apt-get install -y sudo git
	- uses: actions/checkout@v4
	- uses: actions/setup-python@v5
	with:
	python-version: '3.11'
	# cache: 'pip'
	- name: Setup MPI
	uses: mpi4py/setup-mpi@v1
	with:
	mpi: mpich
	- name: Install wget and unzip
	run: apt-get update && apt-get install -y wget unzip
	- uses: lukka/get-cmake@latest
	with:
	useLocalCache: true
	useCloudCache: false
	- run: \|
	wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
	&& sudo dpkg -i cuda-keyring_1.0-1_all.deb \
	&& sudo apt-get update \
	&& sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
	if: false # skip as we use nvidia image
	- run: python -m pip install -U uv
	- run: python -m uv pip install --system "tensorflow>=2.15.0rc0" "torch>=2.2.0"
	- run: \|
	export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
	python -m uv pip install --system -v -e .[gpu,test,lmp,cu12,torch] mpi4py
	env:
	DP_VARIANT: cuda
	DP_ENABLE_NATIVE_OPTIMIZATION: 1
	- run: dp --version
	- run: python -m pytest source/tests --durations=0
	env:
	NUM_WORKERS: 0
	- name: Download libtorch
	run: \|
	wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcu121.zip -O libtorch.zip
	unzip libtorch.zip
	- run: \|
	export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
	source/install/test_cc_local.sh
	env:
	OMP_NUM_THREADS: 1
	TF_INTRA_OP_PARALLELISM_THREADS: 1
	TF_INTER_OP_PARALLELISM_THREADS: 1
	LMP_CXX11_ABI_0: 1
	CMAKE_GENERATOR: Ninja
	DP_VARIANT: cuda
	DP_USE_MPICH2: 1
	- run: \|
	export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$GITHUB_WORKSPACE/libtorch/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH
	export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH
	python -m pytest -s source/lmp/tests \|\| (cat log.lammps && exit 1)
	python -m pytest source/ipi/tests
	env:
	OMP_NUM_THREADS: 1
	TF_INTRA_OP_PARALLELISM_THREADS: 1
	TF_INTER_OP_PARALLELISM_THREADS: 1
	LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
	CUDA_VISIBLE_DEVICES: 0
	pass:
	name: Pass testing on CUDA
	needs: [test_cuda]
	runs-on: ubuntu-latest
	if: always()
	steps:
	- name: Decide whether the needed jobs succeeded or failed
	uses: re-actors/alls-green@release/v1
	with:
	jobs: ${{ toJSON(needs) }}
	allowed-skips: test_cuda

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(pt): consistent fine-tuning with init-model #2662

Workflow file

feat(pt): consistent fine-tuning with init-model #2662

Jobs

Run details

Workflow file for this run