Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework restart/interrupt to actually restart the entire server #5182

Merged
merged 3 commits into from
Apr 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/2 Fixes/5025.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Restarting the kernel will eventually force Jupyter server to shutdown if it doesn't come back.
6 changes: 3 additions & 3 deletions src/client/common/utils/async.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

'use strict';

export async function sleep(timeout: number) {
return new Promise((resolve) => {
setTimeout(resolve, timeout);
export async function sleep(timeout: number) : Promise<number> {
return new Promise<number>((resolve) => {
setTimeout(() => resolve(timeout), timeout);
});
}

Expand Down
94 changes: 52 additions & 42 deletions src/client/datascience/history/history.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { IInterpreterService, PythonInterpreter } from '../../interpreter/contra
import { captureTelemetry, sendTelemetryEvent } from '../../telemetry';
import { EditorContexts, Identifiers, Telemetry } from '../constants';
import { JupyterInstallError } from '../jupyter/jupyterInstallError';
import { JupyterKernelPromiseFailedError } from '../jupyter/jupyterKernelPromiseFailedError';
import {
CellState,
ICell,
Expand Down Expand Up @@ -182,15 +183,15 @@ export class History implements IHistory {
break;

case HistoryMessages.RestartKernel:
this.restartKernel();
this.restartKernel().ignoreErrors();
break;

case HistoryMessages.ReturnAllCells:
this.dispatchMessage(message, payload, this.handleReturnAllCells);
break;

case HistoryMessages.Interrupt:
this.interruptKernel();
this.interruptKernel().ignoreErrors();
break;

case HistoryMessages.Export:
Expand Down Expand Up @@ -313,58 +314,52 @@ export class History implements IHistory {
}

@captureTelemetry(Telemetry.RestartKernel)
public restartKernel() {
public async restartKernel() : Promise<void> {
if (this.jupyterServer && !this.restartingKernel) {
// Ask the user if they want us to restart or not.
const message = localize.DataScience.restartKernelMessage();
const yes = localize.DataScience.restartKernelMessageYes();
const no = localize.DataScience.restartKernelMessageNo();

this.applicationShell.showInformationMessage(message, yes, no).then(v => {
if (v === yes) {
this.restartKernelInternal().catch(e => {
this.applicationShell.showErrorMessage(e);
this.logger.logError(e);
});
}
});
const v = await this.applicationShell.showInformationMessage(message, yes, no);
if (v === yes) {
await this.restartKernelInternal();
}
}

return Promise.resolve();
}

@captureTelemetry(Telemetry.Interrupt)
public interruptKernel() {
public async interruptKernel() : Promise<void> {
if (this.jupyterServer && !this.restartingKernel) {
const status = this.statusProvider.set(localize.DataScience.interruptKernelStatus());

const settings = this.configuration.getSettings();
const interruptTimeout = settings.datascience.jupyterInterruptTimeout;

this.jupyterServer.interruptKernel(interruptTimeout)
.then(result => {
status.dispose();
if (result === InterruptResult.TimedOut) {
const message = localize.DataScience.restartKernelAfterInterruptMessage();
const yes = localize.DataScience.restartKernelMessageYes();
const no = localize.DataScience.restartKernelMessageNo();

this.applicationShell.showInformationMessage(message, yes, no).then(v => {
if (v === yes) {
this.restartKernelInternal().catch(e => {
this.applicationShell.showErrorMessage(e);
this.logger.logError(e);
});
}
});
} else if (result === InterruptResult.Restarted) {
// Uh-oh, keyboard interrupt crashed the kernel.
this.addSysInfo(SysInfoReason.Interrupt).ignoreErrors();
try {
const result = await this.jupyterServer.interruptKernel(interruptTimeout);
status.dispose();

// We timed out, ask the user if they want to restart instead.
if (result === InterruptResult.TimedOut) {
const message = localize.DataScience.restartKernelAfterInterruptMessage();
const yes = localize.DataScience.restartKernelMessageYes();
const no = localize.DataScience.restartKernelMessageNo();
const v = await this.applicationShell.showInformationMessage(message, yes, no);
if (v === yes) {
await this.restartKernelInternal();
}
})
.catch(err => {
status.dispose();
this.logger.logError(err);
this.applicationShell.showErrorMessage(err);
});
} else if (result === InterruptResult.Restarted) {
// Uh-oh, keyboard interrupt crashed the kernel.
this.addSysInfo(SysInfoReason.Interrupt).ignoreErrors();
}
} catch (err) {
status.dispose();
this.logger.logError(err);
this.applicationShell.showErrorMessage(err);
}
}
}

Expand Down Expand Up @@ -448,26 +443,41 @@ export class History implements IHistory {
}
}

private async restartKernelInternal(): Promise<void> {
this.restartingKernel = true;

// First we need to finish all outstanding cells.
private finishOutstandingCells() {
this.unfinishedCells.forEach(c => {
c.state = CellState.error;
this.postMessage(HistoryMessages.FinishCell, c).ignoreErrors();
});
this.unfinishedCells = [];
this.potentiallyUnfinishedStatus.forEach(s => s.dispose());
this.potentiallyUnfinishedStatus = [];
}

private async restartKernelInternal(): Promise<void> {
this.restartingKernel = true;

// First we need to finish all outstanding cells.
this.finishOutstandingCells();

// Set our status
const status = this.statusProvider.set(localize.DataScience.restartingKernelStatus());

try {
if (this.jupyterServer) {
await this.jupyterServer.restartKernel();
await this.jupyterServer.restartKernel(this.generateDataScienceExtraSettings().jupyterInterruptTimeout);
await this.addSysInfo(SysInfoReason.Restart);
}
} catch (exc) {
// If we get a kernel promise failure, then restarting timed out. Just shutdown and restart the entire server
if (exc instanceof JupyterKernelPromiseFailedError && this.jupyterServer) {
await this.jupyterServer.dispose();
await this.loadJupyterServer(true);
await this.addSysInfo(SysInfoReason.Restart);
} else {
// Show the error message
this.applicationShell.showErrorMessage(exc);
this.logger.logError(exc);
}
} finally {
status.dispose();
this.restartingKernel = false;
Expand Down
4 changes: 2 additions & 2 deletions src/client/datascience/jupyter/jupyterExecution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT License.
'use strict';
import { Kernel } from '@jupyterlab/services';
import { execSync } from 'child_process';
import * as fs from 'fs-extra';
import * as os from 'os';
import * as path from 'path';
Expand Down Expand Up @@ -36,7 +37,6 @@ import {
import { JupyterConnection, JupyterServerInfo } from './jupyterConnection';
import { JupyterKernelSpec } from './jupyterKernelSpec';
import { JupyterWaitForIdleError } from './jupyterWaitForIdleError';
import { execSync } from 'child_process';

enum ModuleExistsResult {
NotFound,
Expand Down Expand Up @@ -360,7 +360,7 @@ export class JupyterExecutionBase implements IJupyterExecution {
// We definitely need an ip address.
extraArgs.push('--ip');
extraArgs.push('127.0.0.1');

// Now see if we need --allow-root.
const idResults = execSync('id', {encoding: 'utf-8'});
if (idResults.includes('(root)')) {
Expand Down
9 changes: 9 additions & 0 deletions src/client/datascience/jupyter/jupyterInterruptError.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
'use strict';

export class JupyterInterruptError extends Error {
constructor(message: string) {
super(message);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
'use strict';

export class JupyterKernelPromiseFailedError extends Error {
constructor(message: string) {
super(message);
}
}
71 changes: 43 additions & 28 deletions src/client/datascience/jupyter/jupyterServer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,23 @@ class CellSubscriber {
this.attemptToFinish();
}

public reject() {
// tslint:disable-next-line:no-any
public reject(e: any) {
if (!this.deferred.completed) {
this.cellRef.state = CellState.error;
this.subscriber.next(this.cellRef);
this.subscriber.complete();
this.deferred.reject(e);
this.promiseComplete(this);
}
}

public cancel() {
if (!this.deferred.completed) {
this.cellRef.state = CellState.error;
this.subscriber.next(this.cellRef);
this.subscriber.complete();
this.deferred.reject();
this.deferred.resolve();
this.promiseComplete(this);
}
}
Expand Down Expand Up @@ -162,7 +173,7 @@ export class JupyterServerBase implements INotebookServer {

// Wait for it to be ready
traceInfo(`Waiting for idle ${this.id}`);
await this.session.waitForIdle();
await this.session.waitForIdle(Number.MAX_SAFE_INTEGER);

traceInfo(`Performing initial setup ${this.id}`);
// Run our initial setup and plot magics
Expand All @@ -186,8 +197,8 @@ export class JupyterServerBase implements INotebookServer {
return this.shutdown();
}

public waitForIdle(): Promise<void> {
return this.session ? this.session.waitForIdle() : Promise.resolve();
public waitForIdle(timeoutMs: number): Promise<void> {
return this.session ? this.session.waitForIdle(timeoutMs) : Promise.resolve();
}

public execute(code: string, file: string, line: number, id: string, cancelToken?: CancellationToken, silent?: boolean): Promise<ICell[]> {
Expand Down Expand Up @@ -261,17 +272,16 @@ export class JupyterServerBase implements INotebookServer {
};
}

public async restartKernel(): Promise<void> {
public async restartKernel(timeoutMs: number): Promise<void> {
if (this.session) {
// Update our start time so we don't keep sending responses
this.sessionStartTime = Date.now();

// Complete all pending as an error. We're restarting
const copyPending = [...this.pendingCellSubscriptions];
copyPending.forEach(c => c.reject());
this.finishUncompletedCells();

// Restart our kernel
await this.session.restart();
await this.session.restart(timeoutMs);

// Rerun our initial setup for the notebook
this.ranInitialSetup = false;
Expand All @@ -289,14 +299,12 @@ export class JupyterServerBase implements INotebookServer {
// restarted the kernel.
const interruptBeginTime = Date.now();

// Copy the list of pending cells. If these don't finish before the timeout
// then our interrupt didn't work.
const copyPending = [...this.pendingCellSubscriptions];
// Get just the first pending cell (it should be the oldest). If it doesn't finish
// by our timeout, then our interrupt didn't work.
const firstPending = this.pendingCellSubscriptions.length > 0 ? this.pendingCellSubscriptions[0] : undefined;

// Create a promise that resolves when all of our currently
// pending cells finish.
const finished = copyPending.length > 0 ?
Promise.all(copyPending.map(d => d.promise)) : Promise.resolve([CellState.finished]);
// Create a promise that resolves when the first pending cell finishes
const finished = firstPending ? firstPending.promise : Promise.resolve(CellState.finished);

// Create a deferred promise that resolves if we have a failure
const restarted = createDeferred<CellState[]>();
Expand All @@ -315,37 +323,38 @@ export class JupyterServerBase implements INotebookServer {
restarted.resolve([]);

// Fail all of the active (might be new ones) pending cell executes. We restarted.
const newCopyPending = [...this.pendingCellSubscriptions];
newCopyPending.forEach(c => {
c.reject();
});
this.finishUncompletedCells();
};
const restartHandlerToken = this.session.onRestarted(restartHandler);

// Start our interrupt. If it fails, indicate a restart
this.session.interrupt().catch(exc => {
this.session.interrupt(timeoutMs).catch(exc => {
this.logger.logWarning(`Error during interrupt: ${exc}`);
restarted.resolve([]);
});

try {
// Wait for all of the pending cells to finish or the timeout to fire
const result = await Promise.race([finished, restarted.promise, sleep(timeoutMs)]);
const states = result as CellState[];

// See if we restarted or not
if (restarted.completed) {
return InterruptResult.Restarted;
}

if (states) {
// We got back the pending cells
return InterruptResult.Success;
// See if we timed out or not.
if (result === timeoutMs) {
// We timed out. You might think we should stop our pending list, but that's not
// up to us. The cells are still executing. The user has to request a restart or try again
return InterruptResult.TimedOut;
}

// We timed out. You might think we should stop our pending list, but that's not
// up to us. The cells are still executing. The user has to request a restart or try again
return InterruptResult.TimedOut;
// Cancel all other pending cells as we interrupted.
this.finishUncompletedCells();

// Indicate the interrupt worked.
return InterruptResult.Success;

} catch (exc) {
// Something failed. See if we restarted or not.
if (this.sessionStartTime && (interruptBeginTime < this.sessionStartTime)) {
Expand Down Expand Up @@ -379,6 +388,12 @@ export class JupyterServerBase implements INotebookServer {
};
}

private finishUncompletedCells() {
const copyPending = [...this.pendingCellSubscriptions];
copyPending.forEach(c => c.cancel());
this.pendingCellSubscriptions = [];
}

private getDisposedError(): Error {
// We may have been disposed because of a crash. See if our connection info is indicating shutdown
if (this.serverExitCode) {
Expand Down
8 changes: 4 additions & 4 deletions src/client/datascience/jupyter/jupyterServerFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ export class JupyterServerFactory implements INotebookServer {
return server.dispose();
}

public async waitForIdle(): Promise<void> {
public async waitForIdle(timeoutMs: number): Promise<void> {
const server = await this.serverFactory.get();
return server.waitForIdle();
return server.waitForIdle(timeoutMs);
}

public async execute(code: string, file: string, line: number, id: string, cancelToken?: CancellationToken, silent?: boolean): Promise<ICell[]> {
Expand Down Expand Up @@ -118,9 +118,9 @@ export class JupyterServerFactory implements INotebookServer {
});
}

public async restartKernel(): Promise<void> {
public async restartKernel(timeoutMs: number): Promise<void> {
const server = await this.serverFactory.get();
return server.restartKernel();
return server.restartKernel(timeoutMs);
}

public async interruptKernel(timeoutMs: number): Promise<InterruptResult> {
Expand Down
Loading