-
Notifications
You must be signed in to change notification settings - Fork 485
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
perf(childprocess): spawned processes are tracked and monitored. #6304
Changes from all commits
53aaeef
d48f1b3
44fe332
9d40215
bcb6423
8975c33
72681fd
5c652b6
6bb8c74
a9224d3
4bfff0a
61e6fbf
2df7506
efe336e
30ab5b5
10537e1
46a556b
3c1531f
e9b4605
c1975f9
59f8215
39175c0
e04eefe
abf3603
4f2eaf0
b0976b3
ec3abbf
422fa1d
7a702ff
db973f5
2e94873
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,9 +44,14 @@ export class PollingSet<T> extends Set<T> { | |
this.clearTimer() | ||
} | ||
} | ||
|
||
// TODO(hkobew): Overwrite the add method instead of adding seperate method. If we add item to set, timer should always start. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. plan to do as quick follow up, since this involves touching some other files unrelated to process work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
public start(id: T): void { | ||
this.add(id) | ||
this.pollTimer = this.pollTimer ?? globals.clock.setInterval(() => this.poll(), this.interval) | ||
} | ||
|
||
public override clear(): void { | ||
this.clearTimer() | ||
super.clear() | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,8 @@ import * as proc from 'child_process' // eslint-disable-line no-restricted-impor | |
import * as crossSpawn from 'cross-spawn' | ||
import * as logger from '../logger' | ||
import { Timeout, CancellationError, waitUntil } from './timeoutUtils' | ||
import { PollingSet } from './pollingSet' | ||
import { getLogger } from '../logger/logger' | ||
|
||
export interface RunParameterContext { | ||
/** Reports an error parsed from the stdin/stdout streams. */ | ||
|
@@ -61,14 +63,144 @@ export interface ChildProcessResult { | |
|
||
export const eof = Symbol('EOF') | ||
|
||
export interface ProcessStats { | ||
memory: number | ||
cpu: number | ||
} | ||
export class ChildProcessTracker { | ||
static readonly pollingInterval: number = 10000 // Check usage every 10 seconds | ||
static readonly thresholds: ProcessStats = { | ||
memory: 100 * 1024 * 1024, // 100 MB | ||
cpu: 50, | ||
} | ||
static readonly logger = getLogger('childProcess') | ||
#processByPid: Map<number, ChildProcess> = new Map<number, ChildProcess>() | ||
#pids: PollingSet<number> | ||
|
||
public constructor() { | ||
this.#pids = new PollingSet(ChildProcessTracker.pollingInterval, () => this.monitor()) | ||
} | ||
|
||
private cleanUp() { | ||
const terminatedProcesses = Array.from(this.#pids.values()).filter( | ||
(pid: number) => this.#processByPid.get(pid)?.stopped | ||
) | ||
for (const pid of terminatedProcesses) { | ||
this.delete(pid) | ||
} | ||
} | ||
|
||
private async monitor() { | ||
this.cleanUp() | ||
ChildProcessTracker.logger.debug(`Active running processes size: ${this.#pids.size}`) | ||
|
||
for (const pid of this.#pids.values()) { | ||
await this.checkProcessUsage(pid) | ||
} | ||
} | ||
|
||
private async checkProcessUsage(pid: number): Promise<void> { | ||
if (!this.#pids.has(pid)) { | ||
ChildProcessTracker.logger.warn(`Missing process with id ${pid}`) | ||
return | ||
} | ||
const stats = this.getUsage(pid) | ||
if (stats) { | ||
ChildProcessTracker.logger.debug(`Process ${pid} usage: %O`, stats) | ||
if (stats.memory > ChildProcessTracker.thresholds.memory) { | ||
ChildProcessTracker.logger.warn(`Process ${pid} exceeded memory threshold: ${stats.memory}`) | ||
} | ||
if (stats.cpu > ChildProcessTracker.thresholds.cpu) { | ||
ChildProcessTracker.logger.warn(`Process ${pid} exceeded cpu threshold: ${stats.cpu}`) | ||
} | ||
} | ||
} | ||
|
||
public add(childProcess: ChildProcess) { | ||
const pid = childProcess.pid() | ||
this.#processByPid.set(pid, childProcess) | ||
this.#pids.start(pid) | ||
} | ||
|
||
public delete(childProcessId: number) { | ||
this.#processByPid.delete(childProcessId) | ||
this.#pids.delete(childProcessId) | ||
} | ||
|
||
public get size() { | ||
return this.#pids.size | ||
} | ||
|
||
public has(childProcess: ChildProcess) { | ||
return this.#pids.has(childProcess.pid()) | ||
} | ||
|
||
public clear() { | ||
for (const childProcess of this.#processByPid.values()) { | ||
childProcess.stop(true) | ||
} | ||
this.#pids.clear() | ||
this.#processByPid.clear() | ||
} | ||
|
||
public getUsage(pid: number): ProcessStats { | ||
try { | ||
// isWin() leads to circular dependency. | ||
return process.platform === 'win32' ? getWindowsUsage() : getUnixUsage() | ||
} catch (e) { | ||
ChildProcessTracker.logger.warn(`Failed to get process stats for ${pid}: ${e}`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it ever fails we should probably not attempt on future invocations? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really sure what could cause it to fail here. Do you think there is a risk in continually retrying? Maybe potential log spam? Could this be combatted by setting the polling interval longer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Permissions issues usually.
Yeah that's at least my first thought. When a system is in a bad state, it can make things worse if logs "cascade" with a deluge of redundant warnings. |
||
return { cpu: 0, memory: 0 } | ||
} | ||
|
||
function getWindowsUsage() { | ||
const cpuOutput = proc | ||
.execFileSync('wmic', [ | ||
'path', | ||
'Win32_PerfFormattedData_PerfProc_Process', | ||
'where', | ||
`IDProcess=${pid}`, | ||
'get', | ||
'PercentProcessorTime', | ||
]) | ||
.toString() | ||
const memOutput = proc | ||
.execFileSync('wmic', ['process', 'where', `ProcessId=${pid}`, 'get', 'WorkingSetSize']) | ||
.toString() | ||
|
||
const cpuPercentage = parseFloat(cpuOutput.split('\n')[1]) | ||
const memoryBytes = parseInt(memOutput.split('\n')[1]) * 1024 | ||
|
||
return { | ||
cpu: isNaN(cpuPercentage) ? 0 : cpuPercentage, | ||
memory: memoryBytes, | ||
} | ||
} | ||
|
||
function getUnixUsage() { | ||
const cpuMemOutput = proc.execFileSync('ps', ['-p', pid.toString(), '-o', '%cpu,%mem']).toString() | ||
const rssOutput = proc.execFileSync('ps', ['-p', pid.toString(), '-o', 'rss']).toString() | ||
|
||
const cpuMemLines = cpuMemOutput.split('\n')[1].trim().split(/\s+/) | ||
const cpuPercentage = parseFloat(cpuMemLines[0]) | ||
const memoryBytes = parseInt(rssOutput.split('\n')[1]) * 1024 | ||
|
||
return { | ||
cpu: isNaN(cpuPercentage) ? 0 : cpuPercentage, | ||
memory: memoryBytes, | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Convenience class to manage a child process | ||
* To use: | ||
* - instantiate | ||
* - call and await run to get the results (pass or fail) | ||
*/ | ||
export class ChildProcess { | ||
static #runningProcesses: Map<number, ChildProcess> = new Map() | ||
static #runningProcesses = new ChildProcessTracker() | ||
static stopTimeout = 3000 | ||
#childProcess: proc.ChildProcess | undefined | ||
#processErrors: Error[] = [] | ||
#processResult: ChildProcessResult | undefined | ||
|
@@ -285,7 +417,7 @@ export class ChildProcess { | |
child.kill(signal) | ||
|
||
if (force === true) { | ||
waitUntil(async () => this.stopped, { timeout: 3000, interval: 200, truthy: true }) | ||
waitUntil(async () => this.stopped, { timeout: ChildProcess.stopTimeout, interval: 200, truthy: true }) | ||
.then((stopped) => { | ||
if (!stopped) { | ||
child.kill('SIGKILL') | ||
|
@@ -309,7 +441,7 @@ export class ChildProcess { | |
if (pid === undefined) { | ||
return | ||
} | ||
ChildProcess.#runningProcesses.set(pid, this) | ||
ChildProcess.#runningProcesses.add(this) | ||
|
||
const timeoutListener = options?.timeout?.token.onCancellationRequested(({ agent }) => { | ||
const message = agent === 'user' ? 'Cancelled: ' : 'Timed out: ' | ||
|
@@ -319,7 +451,7 @@ export class ChildProcess { | |
|
||
const dispose = () => { | ||
timeoutListener?.dispose() | ||
ChildProcess.#runningProcesses.delete(pid) | ||
ChildProcess.#runningProcesses.delete(this.pid()) | ||
} | ||
|
||
process.on('exit', dispose) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool, I love that we keep enhancing dev-mode.