Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opening large or binary files #8152

Merged
merged 4 commits into from
Aug 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions packages/core/src/common/buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
*--------------------------------------------------------------------------------------------*/
// based on https://github.com/microsoft/vscode/blob/04c36be045a94fee58e5f8992d3e3fd980294a84/src/vs/base/common/buffer.ts

/* eslint-disable no-null/no-null */

import { Buffer as SaferBuffer } from 'safer-buffer';
import * as iconv from 'iconv-lite';
import * as streams from './stream';
Expand Down Expand Up @@ -175,6 +177,19 @@ export namespace BinaryBufferReadable {
export function fromBuffer(buffer: BinaryBuffer): BinaryBufferReadable {
return streams.toReadable<BinaryBuffer>(buffer);
}
export function fromReadable(readable: streams.Readable<string>): BinaryBufferReadable {
return {
read(): BinaryBuffer | null {
const value = readable.read();

if (typeof value === 'string') {
return BinaryBuffer.fromString(value);
}

return null;
}
};
}
}

export interface BinaryBufferReadableStream extends streams.ReadableStream<BinaryBuffer> { }
Expand All @@ -187,9 +202,27 @@ export namespace BinaryBufferReadableStream {
}
}

export interface BinaryBufferReadableBufferedStream extends streams.ReadableBufferedStream<BinaryBuffer> { }
export namespace BinaryBufferReadableBufferedStream {
export async function toBuffer(bufferedStream: streams.ReadableBufferedStream<BinaryBuffer>): Promise<BinaryBuffer> {
if (bufferedStream.ended) {
return BinaryBuffer.concat(bufferedStream.buffer);
}

return BinaryBuffer.concat([

// Include already read chunks...
...bufferedStream.buffer,

// ...and all additional chunks
await BinaryBufferReadableStream.toBuffer(bufferedStream.stream)
]);
}
}

export interface BinaryBufferWriteableStream extends streams.WriteableStream<BinaryBuffer> { }
export namespace BinaryBufferWriteableStream {
export function create(): BinaryBufferWriteableStream {
return streams.newWriteableStream<BinaryBuffer>(chunks => BinaryBuffer.concat(chunks));
export function create(options?: streams.WriteableStreamOptions): BinaryBufferWriteableStream {
return streams.newWriteableStream<BinaryBuffer>(chunks => BinaryBuffer.concat(chunks), options);
}
}
160 changes: 158 additions & 2 deletions packages/core/src/common/encoding-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,20 @@
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

// based on https://github.com/microsoft/vscode/blob/04c36be045a94fee58e5f8992d3e3fd980294a84/src/vs/workbench/services/textfile/common/encoding.ts

/* eslint-disable no-null/no-null */

import * as iconv from 'iconv-lite';
import { Buffer } from 'safer-buffer';
import { injectable } from 'inversify';
import { BinaryBuffer } from './buffer';
import { BinaryBuffer, BinaryBufferReadableStream, BinaryBufferReadable } from './buffer';
import { UTF8, UTF8_with_bom, UTF16be, UTF16le, UTF16be_BOM, UTF16le_BOM, UTF8_BOM } from './encodings';
import { newWriteableStream, ReadableStream, Readable } from './stream';

const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
const NO_ENCODING_GUESS_MIN_BYTES = 512; // when not auto guessing the encoding, small number of bytes are enough
const AUTO_ENCODING_GUESS_MIN_BYTES = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
const AUTO_ENCODING_GUESS_MAX_BYTES = 512 * 128; // set an upper limit for the number of bytes we pass on to jschardet

// we explicitly ignore a specific set of encodings from auto guessing
Expand All @@ -46,6 +50,17 @@ export interface DetectedEncoding {
seemsBinary?: boolean
}

export interface DecodeStreamOptions {
guessEncoding?: boolean;
minBytesRequiredForDetection?: number;

overwriteEncoding(detectedEncoding: string | undefined): Promise<string>;
}
export interface DecodeStreamResult {
stream: ReadableStream<string>;
detected: DetectedEncoding;
}

@injectable()
export class EncodingService {

Expand Down Expand Up @@ -221,4 +236,145 @@ export class EncodingService {
return this.toIconvEncoding(guessed.encoding);
}

decodeStream(source: BinaryBufferReadableStream, options: DecodeStreamOptions): Promise<DecodeStreamResult> {
const minBytesRequiredForDetection = options.minBytesRequiredForDetection ?? options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;

return new Promise<DecodeStreamResult>((resolve, reject) => {
const target = newWriteableStream<string>(strings => strings.join(''));

const bufferedChunks: BinaryBuffer[] = [];
let bytesBuffered = 0;

let decoder: iconv.DecoderStream | undefined = undefined;

const createDecoder = async () => {
try {

// detect encoding from buffer
const detected = await this.detectEncoding(BinaryBuffer.concat(bufferedChunks), options.guessEncoding);

// ensure to respect overwrite of encoding
detected.encoding = await options.overwriteEncoding(detected.encoding);

// decode and write buffered content
decoder = iconv.getDecoder(this.toIconvEncoding(detected.encoding));
const decoded = decoder.write(Buffer.from(BinaryBuffer.concat(bufferedChunks).buffer));
target.write(decoded);

bufferedChunks.length = 0;
bytesBuffered = 0;

// signal to the outside our detected encoding and final decoder stream
resolve({
stream: target,
detected
});
} catch (error) {
reject(error);
}
};

// Stream error: forward to target
source.on('error', error => target.error(error));

// Stream data
source.on('data', async chunk => {

// if the decoder is ready, we just write directly
if (decoder) {
target.write(decoder.write(Buffer.from(chunk.buffer)));
} else {
bufferedChunks.push(chunk);
bytesBuffered += chunk.byteLength;

// buffered enough data for encoding detection, create stream
if (bytesBuffered >= minBytesRequiredForDetection) {

// pause stream here until the decoder is ready
source.pause();

await createDecoder();

// resume stream now that decoder is ready but
// outside of this stack to reduce recursion
setTimeout(() => source.resume());
}
}
});

// Stream end
source.on('end', async () => {

// we were still waiting for data to do the encoding
// detection. thus, wrap up starting the stream even
// without all the data to get things going
if (!decoder) {
await createDecoder();
}

// end the target with the remainders of the decoder
target.end(decoder?.end());
});
});
}

encodeStream(value: string | Readable<string>, options?: ResourceEncoding): Promise<BinaryBuffer | BinaryBufferReadable>
encodeStream(value?: string | Readable<string>, options?: ResourceEncoding): Promise<BinaryBuffer | BinaryBufferReadable | undefined>;
async encodeStream(value: string | Readable<string> | undefined, options?: ResourceEncoding): Promise<BinaryBuffer | BinaryBufferReadable | undefined> {
let encoding = options?.encoding;
const addBOM = options?.hasBOM;
encoding = this.toIconvEncoding(encoding);
if (encoding === UTF8 && !addBOM) {
return value === undefined ? undefined : typeof value === 'string' ?
BinaryBuffer.fromString(value) : BinaryBufferReadable.fromReadable(value);
}

value = value || '';
const readable = typeof value === 'string' ? Readable.fromString(value) : value;
const encoder = iconv.getEncoder(encoding, { addBOM });

let bytesWritten = false;
let done = false;

return {
read(): BinaryBuffer | null {
if (done) {
return null;
}

const chunk = readable.read();
if (typeof chunk !== 'string') {
done = true;

// If we are instructed to add a BOM but we detect that no
// bytes have been written, we must ensure to return the BOM
// ourselves so that we comply with the contract.
if (!bytesWritten && addBOM) {
switch (encoding) {
case UTF8:
case UTF8_with_bom:
return BinaryBuffer.wrap(Uint8Array.from(UTF8_BOM));
case UTF16be:
return BinaryBuffer.wrap(Uint8Array.from(UTF16be_BOM));
case UTF16le:
return BinaryBuffer.wrap(Uint8Array.from(UTF16le_BOM));
}
}

const leftovers = encoder.end();
if (leftovers && leftovers.length > 0) {
bytesWritten = true;
return BinaryBuffer.wrap(leftovers);
}

return null;
}

bytesWritten = true;

return BinaryBuffer.wrap(encoder.write(chunk));
}
};
}

}
39 changes: 33 additions & 6 deletions packages/core/src/common/resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { Disposable } from './disposable';
import { MaybePromise } from './types';
import { CancellationToken } from './cancellation';
import { ApplicationError } from './application-error';
import { ReadableStream, Readable } from './stream';

export interface ResourceVersion {
}
Expand Down Expand Up @@ -62,6 +63,15 @@ export interface Resource extends Disposable {
* @throws `ResourceError.NotFound` if a resource not found
*/
readContents(options?: ResourceReadOptions): Promise<string>;
/**
* Stream latest content of this resource.
*
* If a resource supports versioning it updates version to latest.
* If a resource supports encoding it updates encoding to latest.
*
* @throws `ResourceError.NotFound` if a resource not found
*/
readStream?(options?: ResourceReadOptions): Promise<ReadableStream<string>>;
/**
* Rewrites the complete content for this resource.
* If a resource does not exist it will be created.
Expand All @@ -74,6 +84,18 @@ export interface Resource extends Disposable {
* @throws `ResourceError.OutOfSync` if latest resource version is out of sync with the given
*/
saveContents?(content: string, options?: ResourceSaveOptions): Promise<void>;
/**
* Rewrites the complete content for this resource.
* If a resource does not exist it will be created.
*
* If a resource supports versioning clients can pass some version
* to check against it, if it is not provided latest version is used.
*
* It updates version and encoding to latest.
*
* @throws `ResourceError.OutOfSync` if latest resource version is out of sync with the given
*/
saveStream?(content: Readable<string>, options?: ResourceSaveOptions): Promise<void>;
/**
* Applies incremental content changes to this resource.
*
Expand All @@ -90,7 +112,8 @@ export interface Resource extends Disposable {
}
export namespace Resource {
export interface SaveContext {
content: string
contentLength: number
content: string | Readable<string>
changes?: TextDocumentContentChangeEvent[]
options?: ResourceSaveOptions
}
Expand All @@ -104,10 +127,15 @@ export namespace Resource {
if (token && token.isCancellationRequested) {
return;
}
await resource.saveContents(context.content, context.options);
if (typeof context.content !== 'string' && resource.saveStream) {
await resource.saveStream(context.content, context.options);
} else {
const content = typeof context.content === 'string' ? context.content : Readable.toString(context.content);
await resource.saveContents(content, context.options);
}
}
export async function trySaveContentChanges(resource: Resource, context: SaveContext): Promise<boolean> {
if (!context.changes || !resource.saveContentChanges || shouldSaveContent(context)) {
if (!context.changes || !resource.saveContentChanges || shouldSaveContent(resource, context)) {
return false;
}
try {
Expand All @@ -120,12 +148,11 @@ export namespace Resource {
return false;
}
}
export function shouldSaveContent({ content, changes }: SaveContext): boolean {
if (!changes) {
export function shouldSaveContent(resource: Resource, { contentLength, changes }: SaveContext): boolean {
if (!changes || (resource.saveStream && contentLength > 32 * 1024 * 1024)) {
return true;
}
let contentChangesLength = 0;
const contentLength = content.length;
for (const change of changes) {
contentChangesLength += JSON.stringify(change).length;
if (contentChangesLength > contentLength) {
Expand Down
Loading