Skip to content

Commit

Permalink
Fix CI random fail (#40)
Browse files Browse the repository at this point in the history
* refein

* refein

* add

* publishing

* publishing

* publishing

* publishing

* publishing

* refein

* fix

* publishing

* fix
  • Loading branch information
jackalcooper authored Sep 29, 2021
1 parent 2a9efce commit a0ab40d
Show file tree
Hide file tree
Showing 23 changed files with 74 additions and 53 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ jobs:
docker-run-use-system-http-proxy: false
docker-run-use-lld: true
retry-failed-build: true
clean-ccache: true
python-versions: |
3.6
3.7
Expand Down
26 changes: 13 additions & 13 deletions __tests__/cuda.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,42 +29,41 @@ async function testOneCUDA(
process.env['INPUT_CMAKE-INIT-CACHE'] = '~/oneflow/cmake/caches/ci/cuda.cmake'
const sourceDir = '~/oneflow'
process.env['INPUT_WHEELHOUSE-DIR'] = '~/manylinux-wheelhouse'
env.setBooleanInput('wheel-audit', true)
if (withXLA) {
ok(cudaVersion !== 'none')
env.setBooleanInput('wheel-audit', true)
env.setInput(
'cmake-init-cache',
path.join(sourceDir, 'cmake/caches/ci/cuda-xla.cmake')
)
}
env.setInput(
'build-script',
path.join(sourceDir, 'ci/manylinux/build-gcc7.sh')
)
if (cudaVersion === '11.4') {
env.setBooleanInput('docker-run-use-system-http-proxy', false)
env.setInput('build-script', path.join(sourceDir, 'ci/manylinux/build.sh'))
env.setInput('cmake-init-cache', '~/oneflow/cmake/caches/ci/cuda.cmake')
env.setInput('cmake-init-cache', '~/oneflow/cmake/caches/cn/cuda.cmake')
}
if (cudaVersion === 'none') {
env.setBooleanInput('docker-run-use-system-http-proxy', false)
env.setInput(
'build-script',
path.join(sourceDir, 'ci/manylinux/build-gcc7.sh')
)
env.setInput('build-script', path.join(sourceDir, 'ci/manylinux/build.sh'))
env.setInput('cmake-init-cache', '~/oneflow/cmake/caches/ci/cpu.cmake')
env.setBooleanInput('docker-run-use-lld', true)
env.setInput('build-script', path.join(sourceDir, 'ci/manylinux/build.sh'))
}
process.env['INPUT_ONEFLOW-SRC'] = sourceDir
process.env[
'INPUT_MANYLINUX-CACHE-DIR'
] = '~/manylinux-cache-dirs/unittest-'.concat(cudaVersion)
env.setMultilineInput('python-versions', ['3.6'])
env.setMultilineInput('python-versions', ['3.7'])
env.setInput('self-hosted', 'true')
env.setInput('cuda-version', cudaVersion)
env.setBooleanInput('docker-run-use-lld', false)
env.setBooleanInput('clear-wheelhouse-dir', true)
env.setInput(
'build-script',
path.join(sourceDir, 'ci/manylinux/build-gcc7.sh')
)
env.setBooleanInput('retry-failed-build', false)
env.setBooleanInput('clean-ccache', true)
const manylinuxVersion = '2014'
let tag = ''
const TEST_MANYLINUX = process.env['TEST_MANYLINUX'] || ''
Expand All @@ -81,8 +80,9 @@ test(
'build manylinux pip',
async () => {
// await testOneCUDA('none', false)
// await testOneCUDA('10.2')
await testOneCUDA('10.1', true)
await testOneCUDA('10.2', false)
// await testOneCUDA('10.1', true)
// await testOneCUDA('11.4', false)
},
MINUTES30
)
10 changes: 7 additions & 3 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ inputs:
force-rebuild:
description: 'rebuild even package with same key found'
required: false
default: 'false'
default: false
dry-run:
description: ''
required: false
default: 'false'
default: false
self-hosted:
description: ''
required: false
default: 'false'
default: false
python-versions:
description: ''
required: true
Expand All @@ -71,6 +71,10 @@ inputs:
description: 'Retry a failed build oneflow or not'
required: false
default: false
clean-ccache:
description: ''
required: false
default: false
docker-run-use-system-http-proxy:
description: ''
required: false
Expand Down
2 changes: 1 addition & 1 deletion degist/download/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ inputs:
digest-cache-dir:
description: ''
required: false
default: '~/digest-cache'
default: './digest-cache'

outputs:
entry-dir:
Expand Down
2 changes: 1 addition & 1 deletion dist/buildImage/index.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/buildOneFlow/index.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/cacheComplete/index.js

Large diffs are not rendered by default.

Binary file not shown.
4 changes: 2 additions & 2 deletions dist/downloadByDigest/index.js

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
6 changes: 3 additions & 3 deletions dist/genBuildMatrix/index.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions dist/genTestMatrix/index.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions dist/mirrorTools/index.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/postCacheComplete/index.js

Large diffs are not rendered by default.

Binary file not shown.
4 changes: 2 additions & 2 deletions dist/uploadByDigest/index.js

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"all": "npm run build && npm run format && npm run lint && npm run package && npm test",
"fast": "npm run build && npm run package && npm test",
"test-ssh": "npm run build && TEST_SSH=1 npm run test -- __tests__/ssh.test.ts",
"test-cuda": "npm run build && TEST_MANYLINUX='img;build' npm run test -- __tests__/cuda.test.ts",
"test-cuda": "TEST_MANYLINUX='img;build' npm run test -- __tests__/cuda.test.ts",
"test-hash": "npm run build && npm run test -- __tests__/hash.test.ts",
"pub": "rm -rf dist && rm -rf lib && npm run all && git add . && git commit -m publishing && git push"
},
Expand Down
22 changes: 19 additions & 3 deletions src/utils/docker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ async function buildAndMakeWheel(
createOptions: Object,
docker: Docker,
buildDir: string,
shouldCleanBuildDir: Boolean
shouldCleanBuildDir: Boolean,
shouldCleanCcache: Boolean
): Promise<void> {
const shouldSymbolicLinkLld = core.getBooleanInput('docker-run-use-lld')
const shouldAuditWheel = core.getBooleanInput('wheel-audit', {
Expand All @@ -268,6 +269,13 @@ async function buildAndMakeWheel(
if (shouldCleanBuildDir) {
await runBash(container, `rm -rf ${path.join(buildDir, '*')}`)
}
await runBash(container, 'ccache -sv')
if (shouldCleanCcache) {
core.warning(`cleaning ccache...`)
await runBash(container, 'ccache -C')
await runBash(container, `rm -rf ~/.ccache/*`)
await runBash(container, 'ccache -sv')
}
const pythonVersions: string[] = core.getMultilineInput('python-versions', {
required: true
})
Expand Down Expand Up @@ -386,15 +394,23 @@ export async function buildOneFlow(tag: string): Promise<void> {
`ONEFLOW_CI_LLVM_DIR=${llvmDir}`
].concat(httpProxyEnvs)
}

try {
const shouldCleanCcache = core.getBooleanInput('clean-ccache')
await killContainer(docker, containerName)
await buildAndMakeWheel(createOptions, docker, buildDir, false)
await buildAndMakeWheel(
createOptions,
docker,
buildDir,
false,
shouldCleanCcache
)
} catch (error) {
const retryFailedBuild = core.getBooleanInput('retry-failed-build')
if (retryFailedBuild) {
core.warning('Retry Build and Make Wheel.')
await killContainer(docker, containerName)
await buildAndMakeWheel(createOptions, docker, buildDir, true)
await buildAndMakeWheel(createOptions, docker, buildDir, true, false)
} else {
core.setFailed(error as Error)
throw error
Expand Down
7 changes: 1 addition & 6 deletions src/utils/exec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@ export async function exec(
args?: string[],
options?: ExecOptions
): Promise<number> {
const isDryRun: boolean = core.getBooleanInput('dry-run')
if (isDryRun) {
return 0
} else {
return await exec_.exec(commandLine, args, options)
}
return await exec_.exec(commandLine, args, options)
}

/**
Expand Down
23 changes: 14 additions & 9 deletions src/utils/ssh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import path from 'path'
import * as core from '@actions/core'
import * as fs from 'fs'
import Client from 'ssh2-sftp-client'
import * as exec from '@actions/exec'

function getEntryDir(tankDir: string, digest: string, entry: string): string {
return path.join(tankDir, 'digest', digest, entry)
Expand All @@ -27,6 +28,9 @@ export async function uploadByDigest(): Promise<void> {
const failed: string[] = []
const successful: string[] = []
const tankDst = path.join(getEntryDir(sshTankPath, digest, entry), dstDir)
const rmCommand = `rm -rf ${tankDst}`
core.info(`[cmd] ${rmCommand}`)
await ssh.execCommand(rmCommand)
const isSuccessful = await ssh.putDirectory(srcDir, tankDst, {
recursive: true,
concurrency: 10,
Expand Down Expand Up @@ -59,29 +63,30 @@ export async function downloadByDigest(): Promise<void> {
const entryDir = path.join(digestDir, entry)
const sshTankHost = core.getInput('ssh-tank-host', {required: true})
const sshTankPath = core.getInput('ssh-tank-path', {required: true})
if (!fs.existsSync(digestDir)) {
// remove all if it is a different digestDir
if (fs.existsSync(cacheDir)) {
core.info(`[rm] ${cacheDir}`)
fs.rmdirSync(cacheDir, {recursive: true})
}
fs.mkdirSync(digestDir, {recursive: true})
}
fs.mkdirSync(digestDir, {recursive: true})
core.setOutput('entry-dir', entryDir) // setOutput before return
if (fs.existsSync(entryDir)) {
core.info(`[exist] ${entryDir}`)
return
}
const remoteDir = getEntryDir(sshTankPath, digest, entry)
if (os.hostname() === 'oneflow-13' && sshTankHost === '192.168.1.13') {
core.info(`[symlink] ${os.hostname()}`)
await exec.exec('mkdir', ['-p', entryDir])
await exec.exec('rm', ['-rf', entryDir])
await exec.exec('ln', ['-sf', remoteDir, entryDir])
return
}
const sftp = new Client()
try {
core.info(`[connect] ${sshTankHost}`)
await sftp.connect({
host: sshTankHost,
username: os.userInfo().username,
privateKey: fs.readFileSync(
path.join(os.userInfo().homedir, '.ssh/id_rsa')
)
})
const remoteDir = getEntryDir(sshTankPath, digest, entry)
core.info(`[from] ${remoteDir}`)
core.info(`[to] ${entryDir}`)
await sftp.downloadDir(remoteDir, entryDir)
Expand Down

0 comments on commit a0ab40d

Please sign in to comment.