From c1442b4e2d3c61666132ddbaaa219eed8f134481 Mon Sep 17 00:00:00 2001
From: Dustin Popp <dpopp07@gmail.com>
Date: Mon, 18 Mar 2019 14:06:17 -0500
Subject: [PATCH 1/2] Revert "refactor: depend on node-sdk for recognize stream
 and query string"

This reverts commit 1ad98a080ca640cde213ef5a831c0754bab7af9b.
---
 speech-to-text/index.js                |   2 +-
 speech-to-text/recognize-file.js       |   2 +-
 speech-to-text/recognize-microphone.js |   2 +-
 speech-to-text/recognize-stream.js     | 402 +++++++++++++++++++++++++
 text-to-speech/synthesize.js           |   2 +-
 util/querystring.js                    |  19 ++
 6 files changed, 425 insertions(+), 4 deletions(-)
 create mode 100644 speech-to-text/recognize-stream.js
 create mode 100644 util/querystring.js

diff --git a/speech-to-text/index.js b/speech-to-text/index.js
index 3f210697..6343ec75 100644
--- a/speech-to-text/index.js
+++ b/speech-to-text/index.js
@@ -34,7 +34,7 @@ module.exports = {
   /**
    * @see RecognizeStream
    */
-  RecognizeStream: require('watson-developer-cloud/lib/recognize-stream'),
+  RecognizeStream: require('./recognize-stream'),
   /**
    * @see FilePlayer
    */
diff --git a/speech-to-text/recognize-file.js b/speech-to-text/recognize-file.js
index 1d8b51f3..65fd4269 100644
--- a/speech-to-text/recognize-file.js
+++ b/speech-to-text/recognize-file.js
@@ -16,7 +16,7 @@
 
 'use strict';
 var BlobStream = require('readable-blob-stream');
-var RecognizeStream = require('watson-developer-cloud/lib/recognize-stream');
+var RecognizeStream = require('./recognize-stream.js');
 var FilePlayer = require('./file-player.js');
 var FormatStream = require('./format-stream.js');
 var TimingStream = require('./timing-stream.js');
diff --git a/speech-to-text/recognize-microphone.js b/speech-to-text/recognize-microphone.js
index e3c49f25..1059cde0 100644
--- a/speech-to-text/recognize-microphone.js
+++ b/speech-to-text/recognize-microphone.js
@@ -17,7 +17,7 @@
 'use strict';
 var getUserMedia = require('get-user-media-promise');
 var MicrophoneStream = require('microphone-stream');
-var RecognizeStream = require('watson-developer-cloud/lib/recognize-stream');
+var RecognizeStream = require('./recognize-stream.js');
 var L16 = require('./webaudio-l16-stream.js');
 var FormatStream = require('./format-stream.js');
 var assign = require('object.assign/polyfill')();
diff --git a/speech-to-text/recognize-stream.js b/speech-to-text/recognize-stream.js
new file mode 100644
index 00000000..6fa48ca0
--- /dev/null
+++ b/speech-to-text/recognize-stream.js
@@ -0,0 +1,402 @@
+/**
+ * Copyright 2014 IBM Corp. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+'use strict';
+
+var Duplex = require('stream').Duplex;
+var util = require('util');
+var pick = require('object.pick');
+var W3CWebSocket = require('websocket').w3cwebsocket;
+var contentType = require('./content-type');
+var qs = require('../util/querystring.js');
+
+var OPENING_MESSAGE_PARAMS_ALLOWED = [
+  'inactivity_timeout',
+  'timestamps',
+  'word_confidence',
+  'content-type',
+  'interim_results',
+  'keywords',
+  'keywords_threshold',
+  'max_alternatives',
+  'word_alternatives_threshold',
+  'profanity_filter',
+  'smart_formatting',
+  'speaker_labels'
+];
+
+var QUERY_PARAMS_ALLOWED = ['customization_id', 'acoustic_customization_id', 'model', 'watson-token', 'access_token', 'X-Watson-Learning-Opt-Out'];
+
+/**
+ * pipe()-able Node.js Duplex stream - accepts binary audio and emits text/objects in it's `data` events.
+ *
+ * Uses WebSockets under the hood. For audio with no recognizable speech, no `data` events are emitted.
+ *
+ * By default, only finalized text is emitted in the data events, however when `objectMode`/`readableObjectMode` and `interim_results` are enabled, both interim and final results objects are emitted.
+ * WriteableElementStream uses this, for example, to live-update the DOM with word-by-word transcriptions.
+ *
+ * Note that the WebSocket connection is not established until the first chunk of data is recieved. This allows for auto-detection of content type (for wav/flac/opus audio).
+ *
+ * @param {Object} options
+ * @param {String} [options.model='en-US_BroadbandModel'] - voice model to use. Microphone streaming only supports broadband models.
+ * @param {String} [options.url='wss://stream.watsonplatform.net/speech-to-text/api'] base URL for service
+ * @param {String} [options.token] - Auth token for CF services
+ * @param {String} options.access_token - IAM Access Token for RC services
+ * @param {Object} [options.headers] - Only works in Node.js, not in browsers. Allows for custom headers to be set, including an Authorization header (preventing the need for auth tokens)
+ * @param {String} [options.content-type='audio/wav'] - content type of audio; can be automatically determined from file header in most cases. only wav, flac, ogg/opus, and webm are supported
+ * @param {Boolean} [options.interim_results=false] - Send back non-final previews of each "sentence" as it is being processed. These results are ignored in text mode.
+ * @param {Boolean} [options.word_confidence=false] - include confidence scores with results.
+ * @param {Boolean} [options.timestamps=false] - include timestamps with results.
+ * @param {Number} [options.max_alternatives=1] - maximum number of alternative transcriptions to include.
+ * @param {Array<String>} [options.keywords] - a list of keywords to search for in the audio
+ * @param {Number} [options.keywords_threshold] - Number between 0 and 1 representing the minimum confidence before including a keyword in the results. Required when options.keywords is set
+ * @param {Number} [options.word_alternatives_threshold] - Number between 0 and 1 representing the minimum confidence before including an alternative word in the results. Must be set to enable word alternatives,
+ * @param {Boolean} [options.profanity_filter=false] - set to true to filter out profanity and replace the words with *'s
+ * @param {Number} [options.inactivity_timeout=30] - how many seconds of silence before automatically closing the stream. use -1 for infinity
+ * @param {Boolean} [options.readableObjectMode=false] - emit `result` objects instead of string Buffers for the `data` events. Does not affect input (which must be binary)
+ * @param {Boolean} [options.objectMode=false] - alias for options.readableObjectMode
+ * @param {Number} [options.X-Watson-Learning-Opt-Out=false] - set to true to opt-out of allowing Watson to use this request to improve it's services
+ * @param {Boolean} [options.smart_formatting=false] - formats numeric values such as dates, times, currency, etc.
+ * @param {String} [options.customization_id] - Customization ID
+ * @param {String} [options.acoustic_customization_id] - Acoustic customization ID
+ *
+ * @constructor
+ */
+function RecognizeStream(options) {
+  // this stream only supports objectMode on the output side.
+  // It must receive binary data input.
+  if (options.objectMode) {
+    options.readableObjectMode = true;
+    delete options.objectMode;
+  }
+  Duplex.call(this, options);
+  this.options = options;
+  this.listening = false;
+  this.initialized = false;
+  this.finished = false;
+
+  this.on('newListener', function(event) {
+    if (!options.silent) {
+      if (event === 'results' || event === 'result' || event === 'speaker_labels') {
+        // eslint-disable-next-line no-console
+        console.log(
+          new Error(
+            'Watson Speech to Text RecognizeStream: the ' +
+              event +
+              ' event was deprecated. ' +
+              "Please set {objectMode: true} and listen for the 'data' event instead. " +
+              'Pass {silent: true} to disable this message.'
+          )
+        );
+      } else if (event === 'connection-close') {
+        // eslint-disable-next-line no-console
+        console.log(
+          new Error(
+            'Watson Speech to Text RecognizeStream: the ' +
+              event +
+              ' event was deprecated. ' +
+              "Please listen for the 'close' event instead. " +
+              'Pass {silent: true} to disable this message.'
+          )
+        );
+      } else if (event === 'connect') {
+        // eslint-disable-next-line no-console
+        console.log(
+          new Error(
+            'Watson Speech to Text RecognizeStream: the ' +
+              event +
+              ' event was deprecated. ' +
+              "Please listen for the 'open' event instead. " +
+              'Pass {silent: true} to disable this message.'
+          )
+        );
+      }
+    }
+  });
+}
+util.inherits(RecognizeStream, Duplex);
+
+RecognizeStream.WEBSOCKET_CONNECTION_ERROR = 'WebSocket connection error';
+
+RecognizeStream.prototype.initialize = function() {
+  var options = this.options;
+
+  if (options.token && !options['watson-token']) {
+    options['watson-token'] = options.token;
+  }
+  if (options.content_type && !options['content-type']) {
+    options['content-type'] = options.content_type;
+  }
+  if (options['X-WDC-PL-OPT-OUT'] && !options['X-Watson-Learning-Opt-Out']) {
+    options['X-Watson-Learning-Opt-Out'] = options['X-WDC-PL-OPT-OUT'];
+  }
+
+  var queryParams = util._extend(
+    'customization_id' in options ? pick(options, QUERY_PARAMS_ALLOWED) : { model: 'en-US_BroadbandModel' },
+    pick(options, QUERY_PARAMS_ALLOWED)
+  );
+
+  var queryString = qs.stringify(queryParams);
+  var url = (options.url || 'wss://stream.watsonplatform.net/speech-to-text/api').replace(/^http/, 'ws') + '/v1/recognize?' + queryString;
+
+  var openingMessage = pick(options, OPENING_MESSAGE_PARAMS_ALLOWED);
+  openingMessage.action = 'start';
+
+  var self = this;
+
+  // node params: requestUrl, protocols, origin, headers, extraRequestOptions
+  // browser params: requestUrl, protocols (all others ignored)
+  var socket = (this.socket = new W3CWebSocket(url, null, null, options.headers, null));
+
+  // when the input stops, let the service know that we're done
+  self.on('finish', self.finish.bind(self));
+
+  /**
+   * This can happen if the credentials are invalid - in that case, the response from DataPower doesn't include the
+   * necessary CORS headers, so JS can't even read it :(
+   *
+   * @param {Event} event - event object with essentially no useful information
+   */
+  socket.onerror = function(event) {
+    self.listening = false;
+    var err = new Error('WebSocket connection error');
+    err.name = RecognizeStream.WEBSOCKET_CONNECTION_ERROR;
+    err.event = event;
+    self.emit('error', err);
+    self.push(null);
+  };
+
+  this.socket.onopen = function() {
+    self.sendJSON(openingMessage);
+    /**
+     * emitted once the WebSocket connection has been established
+     * @event RecognizeStream#open
+     */
+    self.emit('open');
+  };
+
+  this.socket.onclose = function(e) {
+    // if (self.listening) {
+    self.listening = false;
+    self.push(null);
+    // }
+    /**
+     * @event RecognizeStream#close
+     * @param {Number} reasonCode
+     * @param {String} description
+     */
+    self.emit('close', e.code, e.reason);
+  };
+
+  /**
+   * @event RecognizeStream#error
+   * @param {String} msg custom error message
+   * @param {*} [frame] unprocessed frame (should have a .data property with either string or binary data)
+   * @param {Error} [err]
+   */
+  function emitError(msg, frame, err) {
+    if (err) {
+      err.message = msg + ' ' + err.message;
+    } else {
+      err = new Error(msg);
+    }
+    err.raw = frame;
+    self.emit('error', err);
+  }
+
+  socket.onmessage = function(frame) {
+    if (typeof frame.data !== 'string') {
+      return emitError('Unexpected binary data received from server', frame);
+    }
+
+    var data;
+    try {
+      data = JSON.parse(frame.data);
+    } catch (jsonEx) {
+      return emitError('Invalid JSON received from service:', frame, jsonEx);
+    }
+
+    /**
+     * Emit any messages received over the wire, mainly used for debugging.
+     *
+     * @event RecognizeStream#message
+     * @param {Object} message - frame object with a data attribute that's either a string or a Buffer/TypedArray
+     * @param {Object} [data] - parsed JSON object (if possible);
+     */
+    self.emit('message', frame, data);
+
+    if (data.error) {
+      emitError(data.error, frame);
+    } else if (data.state === 'listening') {
+      // this is emitted both when the server is ready for audio, and after we send the close message to indicate that it's done processing
+      if (self.listening) {
+        self.listening = false;
+        socket.close();
+      } else {
+        self.listening = true;
+        /**
+         * Emitted when the Watson Service indicates readiness to transcribe audio. Any audio sent before this point will be buffered until now.
+         * @event RecognizeStream#listening
+         */
+        self.emit('listening');
+      }
+    } else {
+      if (options.readableObjectMode) {
+        /**
+         * Object with interim or final results, possibly including confidence scores, alternatives, and word timing.
+         * @event RecognizeStream#data
+         * @param {Object} data
+         */
+        self.push(data);
+      } else if (Array.isArray(data.results)) {
+        data.results.forEach(function(result) {
+          if (result.final && result.alternatives) {
+            /**
+             * Finalized text
+             * @event RecognizeStream#data
+             * @param {String} transcript
+             */
+            self.push(result.alternatives[0].transcript, 'utf8');
+          }
+        });
+      }
+    }
+  };
+
+  this.initialized = true;
+};
+
+RecognizeStream.prototype.sendJSON = function sendJSON(msg) {
+  /**
+   * Emits any JSON object sent to the service from the client. Mainly used for debugging.
+   * @event RecognizeStream#send-json
+   * @param {Object} msg
+   */
+  this.emit('send-json', msg);
+  return this.socket.send(JSON.stringify(msg));
+};
+
+RecognizeStream.prototype.sendData = function sendData(data) {
+  /**
+   * Emits any Binary object sent to the service from the client. Mainly used for debugging.
+   * @event RecognizeStream#send-data
+   * @param {Object} msg
+   */
+  this.emit('send-data', data);
+  return this.socket.send(data);
+};
+
+RecognizeStream.prototype._read = function() /* size*/ {
+  // there's no easy way to control reads from the underlying library
+  // so, the best we can do here is a no-op
+};
+
+RecognizeStream.ERROR_UNRECOGNIZED_FORMAT = 'UNRECOGNIZED_FORMAT';
+
+RecognizeStream.prototype._write = function(chunk, encoding, callback) {
+  var self = this;
+  if (self.finished) {
+    // can't send any more data after the stop message (although this shouldn't happen normally...)
+    return;
+  }
+  if (!this.initialized) {
+    if (!this.options['content-type']) {
+      var ct = RecognizeStream.getContentType(chunk);
+      if (ct) {
+        this.options['content-type'] = ct;
+      } else {
+        var err = new Error('Unable to determine content-type from file header, please specify manually.');
+        err.name = RecognizeStream.ERROR_UNRECOGNIZED_FORMAT;
+        this.emit('error', err);
+        this.push(null);
+        return;
+      }
+    }
+    this.initialize();
+
+    this.once('open', function() {
+      self.sendData(chunk);
+      self.afterSend(callback);
+    });
+  } else {
+    self.sendData(chunk);
+    this.afterSend(callback);
+  }
+};
+
+/**
+ * Flow control - don't ask for more data until we've finished what we have
+ *
+ * Notes:
+ *
+ * This limits upload speed to 100 * options.highWaterMark / second.
+ *
+ * The default highWaterMark is 16kB, so the default max upload speed is ~1.6MB/s.
+ *
+ * Microphone input provides audio at a (downsampled) rate of:
+ *   16000 samples/s * 16-bits * 1 channel = 32kB/s
+ * (note the bits to Bytes conversion there)
+ *
+ * @private
+ * @param {Function} next
+ */
+RecognizeStream.prototype.afterSend = function afterSend(next) {
+  if (this.socket.bufferedAmount <= (this._writableState.highWaterMark || 0)) {
+    process.nextTick(next);
+  } else {
+    setTimeout(this.afterSend.bind(this, next), 10);
+  }
+};
+
+/**
+ * Prevents any more audio from being sent over the WebSocket and gracefully closes the connection.
+ * Additional data may still be emitted up until the `end` event is triggered.
+ */
+RecognizeStream.prototype.stop = function() {
+  /**
+   * Event emitted when the stop method is called. Mainly for synchronising with file reading and playback.
+   * @event RecognizeStream#stop
+   */
+  this.emit('stop');
+  this.finish();
+};
+
+RecognizeStream.prototype.finish = function finish() {
+  // this is called both when the source stream finishes, and when .stop() is fired, but we only want to send the stop message once.
+  if (this.finished) {
+    return;
+  }
+  this.finished = true;
+  var self = this;
+  var closingMessage = { action: 'stop' };
+  if (self.socket && self.socket.readyState === self.socket.OPEN) {
+    self.sendJSON(closingMessage);
+  } else {
+    this.once('open', function() {
+      self.sendJSON(closingMessage);
+    });
+  }
+};
+
+RecognizeStream.prototype.promise = require('./to-promise');
+
+RecognizeStream.getContentType = function(buffer) {
+  // the substr really shouldn't be necessary, but there's a bug somewhere that can cause buffer.slice(0,4) to return
+  // the entire contents of the buffer, so it's a failsafe to catch that
+  return contentType.fromHeader(buffer);
+};
+
+module.exports = RecognizeStream;
diff --git a/text-to-speech/synthesize.js b/text-to-speech/synthesize.js
index b2507600..73d12c98 100644
--- a/text-to-speech/synthesize.js
+++ b/text-to-speech/synthesize.js
@@ -15,7 +15,7 @@
  */
 'use strict';
 var pick = require('object.pick');
-var qs = require('watson-developer-cloud/lib/querystring');
+var qs = require('../util/querystring.js');
 
 var QUERY_PARAMS_ALLOWED = ['voice', 'X-WDC-PL-OPT-OUT', 'X-Watson-Learning-Opt-Out', 'text', 'watson-token', 'access_token', 'accept', 'customization_id'];
 
diff --git a/util/querystring.js b/util/querystring.js
new file mode 100644
index 00000000..0c8f1ec4
--- /dev/null
+++ b/util/querystring.js
@@ -0,0 +1,19 @@
+'use strict';
+
+/**
+ * Stringify query params, Watson-style
+ *
+ * Why? The server that processes auth tokens currently only accepts the *exact* string, even if it's invalid for a URL.
+ * Properly url-encoding percent characters causes it to reject the token.
+ * So, this is a custom qs.stringify function that properly encodes everything except watson-token, passing it along verbatim
+ *
+ * @param {Object} queryParams
+ * @return {String}
+ */
+exports.stringify = function stringify(queryParams) {
+  return Object.keys(queryParams)
+    .map(function(key) {
+      return key + '=' + (key === 'watson-token' ? queryParams[key] : encodeURIComponent(queryParams[key])); // the server chokes if the token is correctly url-encoded
+    })
+    .join('&');
+};

From c3d835ba37c5fc61ee9f50e64ffb39deda0a7039 Mon Sep 17 00:00:00 2001
From: Dustin Popp <dpopp07@gmail.com>
Date: Mon, 18 Mar 2019 14:50:55 -0500
Subject: [PATCH 2/2] fix: stabilize package by reverting to old
 recognize-stream class that includes iam bug fixes

---
 examples/server.js                 |  4 ++--
 speech-to-text/recognize-stream.js | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/examples/server.js b/examples/server.js
index 6da3d517..312ccb4a 100644
--- a/examples/server.js
+++ b/examples/server.js
@@ -88,8 +88,8 @@ var sttAuthService = new AuthorizationV1(
   Object.assign(
     {
       username: process.env.SPEECH_TO_TEXT_USERNAME, // or hard-code credentials here
-      password: process.env.SPEECH_TO_TEXT_PASSWORD,
-      iam_apikey: process.env.SPEECH_TO_TEXT_IAM_APIKEY // if using an RC service
+      password: process.env.SPEECH_TO_TEXT_PASSWORD
+      // iam_apikey: process.env.SPEECH_TO_TEXT_IAM_APIKEY // if using an RC service
     },
     vcapServices.getCredentials('speech_to_text') // pulls credentials from environment in bluemix, otherwise returns {}
   )
diff --git a/speech-to-text/recognize-stream.js b/speech-to-text/recognize-stream.js
index 6fa48ca0..16f76fa1 100644
--- a/speech-to-text/recognize-stream.js
+++ b/speech-to-text/recognize-stream.js
@@ -38,7 +38,15 @@ var OPENING_MESSAGE_PARAMS_ALLOWED = [
   'speaker_labels'
 ];
 
-var QUERY_PARAMS_ALLOWED = ['customization_id', 'acoustic_customization_id', 'model', 'watson-token', 'access_token', 'X-Watson-Learning-Opt-Out'];
+var QUERY_PARAMS_ALLOWED = [
+  'language_customization_id',
+  'customization_id',
+  'acoustic_customization_id',
+  'model',
+  'watson-token',
+  'access_token',
+  'X-Watson-Learning-Opt-Out'
+];
 
 /**
  * pipe()-able Node.js Duplex stream - accepts binary audio and emits text/objects in it's `data` events.
@@ -144,8 +152,14 @@ RecognizeStream.prototype.initialize = function() {
     options['X-Watson-Learning-Opt-Out'] = options['X-WDC-PL-OPT-OUT'];
   }
 
+  // compatibility code for the deprecated param, customization_id
+  if (options.customization_id && !options.language_customization_id) {
+    options.language_customization_id = options.customization_id;
+    delete options.customization_id;
+  }
+
   var queryParams = util._extend(
-    'customization_id' in options ? pick(options, QUERY_PARAMS_ALLOWED) : { model: 'en-US_BroadbandModel' },
+    'language_customization_id' in options ? pick(options, QUERY_PARAMS_ALLOWED) : { model: 'en-US_BroadbandModel' },
     pick(options, QUERY_PARAMS_ALLOWED)
   );