Skip to content
This repository has been archived by the owner on Jan 15, 2025. It is now read-only.

Commit

Permalink
Add support for URI reference in LU (#923)
Browse files Browse the repository at this point in the history
* Add support for URI reference in LU

* fixing linting issues.

* removing dead code.

Co-authored-by: Emilio Munoz <[email protected]>
  • Loading branch information
vishwacsena and munozemilio authored Aug 5, 2020
1 parent ec5bb0e commit 789391b
Show file tree
Hide file tree
Showing 8 changed files with 397 additions and 61 deletions.
97 changes: 66 additions & 31 deletions packages/lu/src/parser/lu/luMerger.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const parserObject = require('./../lufile/classes/parserObject');
const txtfile = require('./../lufile/read-text-file');
const BuildDiagnostic = require('./../lufile/diagnostic').BuildDiagnostic;
const LUISObjNameEnum = require('./../utils/enums/luisobjenum');
const fetch = require('node-fetch');

module.exports = {
/**
Expand Down Expand Up @@ -361,7 +362,7 @@ const resolveRefByType = function(srcId, ref, refTree) {
return filter(srcId, ref, refTree);
}

const buildLuJsonObject = async function(luObjArray, log, luis_culture, luSearchFn = findLuFilesInDir){
const buildLuJsonObject = async function(luObjArray, log, luis_culture, luSearchFn = resolveLuContent){
let allParsedLUISContent = []
let allParsedQnAContent = []
let allParsedAlterationsContent = []
Expand Down Expand Up @@ -411,40 +412,74 @@ const buildLuJsonObject = async function(luObjArray, log, luis_culture, luSearch
QnAAlterations: allParsedAlterationsContent
}
}

const findLuFilesInDir = async function(srcId, idsToFind){
let luObjects = []
const resolveLuContent = async function(srcId, idsToFind){
let luObjects = [];
for(let idx = 0; idx < idsToFind.length; idx++) {
let toResolve = idsToFind[idx];
if (isUrl(toResolve.filePath)) {
await resolveLuUriContent(srcId, toResolve, luObjects);
} else {
resolveLuFileContent(toResolve, luObjects, srcId);
}
}
return luObjects;
}
const resolveLuFileContent = function(file, luObjects, srcId) {
let parentFilePath = srcId === 'stdin' ? process.cwd() : path.parse(path.resolve(srcId)).dir
for(let idx = 0; idx < idsToFind.length; idx++ ) {
// Support wild cards at the end of a relative .LU file path.
// './bar/*' should look for all .lu files under the specified folder.
// './bar/**' should recursively look for .lu files under sub-folders as well.
let file = idsToFind[idx]
if(file.filePath.endsWith('*')) {
const isRecursive = file.filePath.endsWith('**')
const rootFolder = file.filePath.replace(/\*/g, '')
let rootPath = rootFolder;
if(!path.isAbsolute(rootFolder)) {
rootPath = path.resolve(parentFilePath, rootFolder);
}
// Get LU files in this location
const luFilesToAdd = helpers.findLUFiles(rootPath, isRecursive);
// add these to filesToParse
for(let f = 0; f < luFilesToAdd.length; f++){
const opts = new luOptions(luFilesToAdd[f], file.includeInCollate)
luObjects.push(new luObject(readLuFile(luFilesToAdd[f]), opts))
}
continue
// Support wild cards at the end of a relative .LU file path.
// './bar/*' should look for all .lu files under the specified folder.
// './bar/**' should recursively look for .lu files under sub-folders as well.
if(file.filePath.endsWith('*')) {
const isRecursive = file.filePath.endsWith('**')
const rootFolder = file.filePath.replace(/\*/g, '')
let rootPath = rootFolder;
if(!path.isAbsolute(rootFolder)) {
rootPath = path.resolve(parentFilePath, rootFolder);
}

if(!path.isAbsolute(file.filePath)) {
file.filePath = path.resolve(parentFilePath, file.filePath)
// Get LU files in this location
const luFilesToAdd = helpers.findLUFiles(rootPath, isRecursive);
// add these to filesToParse
for(let f = 0; f < luFilesToAdd.length; f++){
const opts = new luOptions(luFilesToAdd[f], file.includeInCollate)
luObjects.push(new luObject(readLuFile(luFilesToAdd[f]), opts))
}
// find matching parsed files and ensure includeInCollate is updated if needed.
luObjects.push(new luObject(readLuFile(file.filePath), new luOptions(file.filePath, file.includeInCollate)))

return
}

if(!path.isAbsolute(file.filePath)) {
file.filePath = path.resolve(parentFilePath, file.filePath)
}
// find matching parsed files and ensure includeInCollate is updated if needed.
luObjects.push(new luObject(readLuFile(file.filePath), new luOptions(file.filePath, file.includeInCollate)))
}
const resolveLuUriContent = async function(srcId, toResolve, luObjects) {
let uri = toResolve.filePath || undefined;
if (uri !== undefined) {
let response;
try {
response = await fetch(uri, { method: 'GET' });
} catch (err) {
// throw, invalid URI
let errorMsg = `URI: "${uri}" appears to be invalid. Please double check the URI or re-try this parse when you are connected to the internet.`;
let error = BuildDiagnostic({
message: errorMsg,
range: luImport.Range
})

throw (new exception(retCode.errorCode.INVALID_URI, error.toString(), [error]));
}
var res = await response.buffer();
var encodedRes = helpers.fixBuffer(res);
luObjects.push(new luObject(encodedRes, new luOptions(toResolve.filePath, toResolve.includeInCollate)));
}
}
const isUrl = function(path) {
try {
new URL(path);
return true;
} catch (err) {
return false;
}
return luObjects
}

const updateParsedFiles = function(allParsedLUISContent, allParsedQnAContent, allParsedAlterationsContent, luobject) {
Expand Down
6 changes: 5 additions & 1 deletion packages/lu/src/parser/lufile/parseFileContents.js
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,11 @@ const parseAndHandleImportSection = async function (parsedContent, luResource) {

let contentType = response.headers.get('content-type');
if (!contentType.includes('text/html')) {
parsedContent.qnaJsonStructure.files.push(new qnaFile(linkValue, linkValueText));
if (parseUrl.pathname.toLowerCase().endsWith('.lu') || parseUrl.pathname.toLowerCase().endsWith('.qna')) {
parsedContent.additionalFilesToParse.push(new fileToParse(linkValue));
} else {
parsedContent.qnaJsonStructure.files.push(new qnaFile(linkValue, linkValueText));
}
} else {
parsedContent.qnaJsonStructure.urls.push(linkValue);
}
Expand Down
33 changes: 30 additions & 3 deletions packages/lu/src/parser/utils/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ const helpers = {
let linkValueList = utterance.trim().match(new RegExp(/\(.*?\)/g));
let linkValue = linkValueList[0].replace('(', '').replace(')', '');
if (linkValue === '') throw (new exception(retCode.errorCode.INVALID_LU_FILE_REF, `[ERROR]: Invalid LU File Ref: "${utterance}"`));
let parseUrl = url.parse(linkValue);
if (parseUrl.host || parseUrl.hostname) throw (new exception(retCode.errorCode.INVALID_LU_FILE_REF, `[ERROR]: Invalid LU File Ref: "${utterance}". \n Reference cannot be a URI`));
// reference can either be #<Intent-Name> or #? or /*#? or /**#? or #*utterance* or #<Intent-Name>*patterns*
let splitRegExp = new RegExp(/^(?<fileName>.*?)(?<segment>#|\*+)(?<path>.*?)$/gim);
let splitReference = splitRegExp.exec(linkValue);
Expand Down Expand Up @@ -170,7 +168,36 @@ const helpers = {
(finalLUISJSON.entities || []).forEach(e => {
if (e.explicitlyAdded !== undefined) delete e.explicitlyAdded;
})
}
},
fixBuffer : function(fileBuffer) {
if (fileBuffer) {
// If the data starts with BOM, we know it is UTF
if (fileBuffer[0] === 0xEF && fileBuffer[1] === 0xBB && fileBuffer[2] === 0xBF) {
// EF BB BF UTF-8 with BOM
fileBuffer = fileBuffer.slice(3)
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
// FF FE 00 00 UTF-32, little-endian BOM
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFE && fileBuffer[3] === 0xFF) {
// 00 00 FE FF UTF-32, big-endian BOM
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFF && fileBuffer[3] === 0xFE) {
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE) {
// FF FE UTF-16, little endian BOM
fileBuffer = fileBuffer.slice(2)
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF) {
// FE FF UTF-16, big endian BOM
fileBuffer = fileBuffer.slice(2)
}
}
return fileBuffer.toString('utf8').replace(/\0/g, '');
}

};

module.exports = helpers;
Expand Down
28 changes: 2 additions & 26 deletions packages/lu/src/utils/textfilereader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
const fs = require('fs-extra')
const error = require('./../parser/utils/exception')
const retCode = require('./../parser/utils/enums/CLI-errors')
const helpers = require('./../parser/utils/helpers')

export async function readTextFile(file: any): Promise<string> {
return new Promise(async (resolve, reject) => {
Expand All @@ -14,32 +15,7 @@ export async function readTextFile(file: any): Promise<string> {
return reject('ENOENT: no such file or directory, ' + file)
}
let fileBuffer = await fs.readFile(file)
if (fileBuffer) {
// If the data starts with BOM, we know it is UTF
if (fileBuffer[0] === 0xEF && fileBuffer[1] === 0xBB && fileBuffer[2] === 0xBF) {
// EF BB BF UTF-8 with BOM
fileBuffer = fileBuffer.slice(3)
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
// FF FE 00 00 UTF-32, little-endian BOM
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFE && fileBuffer[3] === 0xFF) {
// 00 00 FE FF UTF-32, big-endian BOM
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFF && fileBuffer[3] === 0xFE) {
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
fileBuffer = fileBuffer.slice(4)
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE) {
// FF FE UTF-16, little endian BOM
fileBuffer = fileBuffer.slice(2)
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF) {
// FE FF UTF-16, big endian BOM
fileBuffer = fileBuffer.slice(2)
}
}
return resolve(fileBuffer.toString('utf8').replace(/\0/g, ''))
return resolve(helpers.fixBuffer(fileBuffer))
} catch (err) {
if (err.message.match(/ENOENT: no such file or directory/)) {
return reject(new error(retCode.errorCode.INVALID_INPUT_FILE, err.message))
Expand Down
91 changes: 91 additions & 0 deletions packages/lu/test/fixtures/verified/importUrl.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
{
"intents": [
{
"name": "None"
}
],
"entities": [
{
"name": "add",
"roles": [],
"children": [
{
"name": "count",
"children": [],
"features": [
{
"modelName": "globalCount",
"isRequired": true
}
]
}
]
},
{
"name": "globalCount",
"roles": [],
"children": [
{
"name": "countNumber",
"children": [],
"features": [
{
"modelName": "number",
"isRequired": true
}
]
}
]
}
],
"composites": [],
"closedLists": [],
"regex_entities": [],
"regex_features": [],
"utterances": [
{
"text": "add two apples",
"intent": "None",
"entities": [
{
"entity": "add",
"startPos": 0,
"endPos": 13,
"children": [
{
"entity": "count",
"startPos": 4,
"endPos": 13
}
]
},
{
"entity": "globalCount",
"startPos": 4,
"endPos": 13,
"children": [
{
"entity": "countNumber",
"startPos": 4,
"endPos": 6
}
]
}
]
}
],
"patterns": [],
"patternAnyEntities": [],
"prebuiltEntities": [
{
"name": "number",
"roles": []
}
],
"luis_schema_version": "7.0.0",
"versionId": "0.1",
"name": "",
"desc": "",
"culture": "en-us",
"phraselists": []
}
28 changes: 28 additions & 0 deletions packages/lu/test/fixtures/verified/referenceUrl.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"intents": [
{
"name": "test"
}
],
"entities": [],
"composites": [],
"closedLists": [],
"regex_entities": [],
"model_features": [],
"regex_features": [],
"utterances": [
{
"text": "add two apples",
"intent": "test",
"entities": []
}
],
"patterns": [],
"patternAnyEntities": [],
"prebuiltEntities": [],
"luis_schema_version": "3.2.0",
"versionId": "0.1",
"name": "",
"desc": "",
"culture": "en-us"
}
49 changes: 49 additions & 0 deletions packages/lu/test/fixtures/verified/referenceUrlWithWildCard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"intents": [
{
"name": "test"
}
],
"entities": [],
"composites": [],
"closedLists": [],
"regex_entities": [],
"model_features": [],
"regex_features": [],
"utterances": [
{
"text": "add two apples",
"intent": "test",
"entities": []
},
{
"text": "one",
"intent": "test",
"entities": []
},
{
"text": "two",
"intent": "test",
"entities": []
}
],
"patterns": [
{
"pattern": "another {entity}",
"intent": "test"
}
],
"patternAnyEntities": [
{
"name": "entity",
"explicitList": [],
"roles": []
}
],
"prebuiltEntities": [],
"luis_schema_version": "3.2.0",
"versionId": "0.1",
"name": "",
"desc": "",
"culture": "en-us"
}
Loading

0 comments on commit 789391b

Please sign in to comment.