Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(HTML Extract Node): Better text extraction, option to specify selectors to skip, option to clean up text data #8586

Merged
35 changes: 31 additions & 4 deletions packages/nodes-base/nodes/Html/Html.node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,20 @@ const extractionValuesCollection: INodeProperties = {
placeholder: 'class',
description: 'The name of the attribute to return the value off',
},
{
displayName: 'Skip Selectors',
name: 'skipSelectors',
type: 'string',
displayOptions: {
show: {
returnValue: ['text'],
'@version': [{ _cnd: { gt: 1.1 } }],
},
},
default: '',
placeholder: 'e.g. img, .className, #ItemId',
description: 'Comma-separated list of selectors to skip in the text extraction',
},
{
displayName: 'Return Array',
name: 'returnArray',
Expand All @@ -114,7 +128,7 @@ export class Html implements INodeType {
name: 'html',
icon: 'file:html.svg',
group: ['transform'],
version: [1, 1.1],
version: [1, 1.1, 1.2],
subtitle: '={{ $parameter["operation"] }}',
description: 'Work with HTML',
defaults: {
Expand Down Expand Up @@ -277,6 +291,14 @@ export class Html implements INodeType {
description:
'Whether to remove automatically all spaces and newlines from the beginning and end of the values',
},
{
displayName: 'Clean Up Text',
name: 'cleanUpText',
type: 'boolean',
default: true,
description:
'Whether to remove remove leading and trailing whitespaces, line breaks (newlines) and condense multiple consecutive whitespaces into a single space',
},
],
},
// ----------------------------------
Expand Down Expand Up @@ -548,14 +570,19 @@ export class Html implements INodeType {
// An array should be returned so iterate over one
// value at a time
newItem.json[valueData.key] = [];
htmlElement.each((i, el) => {
htmlElement.each((_, el) => {
(newItem.json[valueData.key] as Array<string | undefined>).push(
getValue($(el), valueData, options),
getValue($(el), valueData, options, nodeVersion),
);
});
} else {
// One single value should be returned
newItem.json[valueData.key] = getValue(htmlElement, valueData, options);
newItem.json[valueData.key] = getValue(
htmlElement,
valueData,
options,
nodeVersion,
);
}
}
returnData.push(newItem);
Expand Down
5 changes: 5 additions & 0 deletions packages/nodes-base/nodes/Html/test/Html.node.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import { testWorkflows, getWorkflowFilenames } from '@test/nodes/Helpers';

const workflows = getWorkflowFilenames(__dirname);

describe('Test Html Node > extractHtmlContent', () => testWorkflows(workflows));
Loading
Loading