Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Web console: don't send lookups to sampler #16234

Merged
merged 1 commit into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion web-console/src/utils/sampler.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*/

import type { SampleResponse } from './sampler';
import { guessDimensionsFromSampleResponse } from './sampler';
import { changeLookupInExpressionsSampling, guessDimensionsFromSampleResponse } from './sampler';

describe('sampler', () => {
describe('getInferredDimensionsFromSampleResponse', () => {
Expand Down Expand Up @@ -130,4 +130,50 @@ describe('sampler', () => {
`);
});
});

describe('changeLookupInExpressionsSampling', () => {
it('does nothing when there is nothing to do', () => {
expect(changeLookupInExpressionsSampling(`concat("x", 'lol')`)).toEqual(`concat("x", 'lol')`);
});

it('works with a SQL parsable expression', () => {
expect(
changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name'), 'lol')`),
).toEqual(
`concat(concat('lookup_name', '[', "x", '] -- This is a placeholder, lookups are not supported in sampling'), 'lol')`,
);

expect(
changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name', 'fallback'), 'lol')`),
).toEqual(
`concat(nvl(concat('lookup_name', '[', "x", '] -- This is a placeholder, lookups are not supported in sampling'), 'fallback'), 'lol')`,
);

expect(
changeLookupInExpressionsSampling(
`concat(lookup("x", 'lookup_name', 'fallback', '?'), 'lol')`,
),
).toEqual(`concat(null, 'lol')`);
});

it('works with a non-SQL parsable expression', () => {
expect(
changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name'), 'lol')^`),
).toEqual(
`concat(concat('lookup_name','[',"x",'] -- This is a placeholder, lookups are not supported in sampling'), 'lol')^`,
);

expect(
changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name', 'fallback'), 'lol')^`),
).toEqual(
`concat(nvl(concat('lookup_name','[',"x",'] -- This is a placeholder, lookups are not supported in sampling'),'fallback'), 'lol')^`,
);

expect(
changeLookupInExpressionsSampling(
`concat(lookup("x", 'lookup_name', 'fallback', '?'), 'lol')^`,
),
).toEqual(`concat(null, 'lol')^`);
});
});
});
68 changes: 66 additions & 2 deletions web-console/src/utils/sampler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

import { dedupe } from '@druid-toolkit/query';
import { dedupe, F, SqlExpression, SqlFunction } from '@druid-toolkit/query';
import * as JSONBig from 'json-bigint-native';

import type {
Expand Down Expand Up @@ -186,7 +186,7 @@ export async function postToSampler(
sampleSpec: SampleSpec,
forStr: string,
): Promise<SampleResponse> {
sampleSpec = fixSamplerTypes(sampleSpec);
sampleSpec = fixSamplerLookups(fixSamplerTypes(sampleSpec));

let sampleResp: any;
try {
Expand Down Expand Up @@ -622,3 +622,67 @@ export async function sampleForSchema(

return postToSampler(applyCache(sampleSpec, cacheRows), 'schema');
}

function fixSamplerLookups(sampleSpec: SampleSpec): SampleSpec {
const transforms: Transform[] | undefined = deepGet(
sampleSpec,
'spec.dataSchema.transformSpec.transforms',
);
if (!Array.isArray(transforms)) return sampleSpec;

return deepSet(
sampleSpec,
'spec.dataSchema.transformSpec.transforms',
transforms.map(transform => {
const { expression } = transform;
if (typeof expression !== 'string') return transform;
return { ...transform, expression: changeLookupInExpressionsSampling(expression) };
}),
);
}

/**
* Lookups do not work in the sampler because they are not loaded in the Overlord
* to prevent the user from getting an error like "Unknown lookup [lookup name]" we
* change the lookup expression to a placeholder
*
* lookup("x", 'lookup_name') => concat('lookup_name', '[', "x", '] -- This is a placeholder, lookups are not supported in sampling')
* lookup("x", 'lookup_name', 'replaceValue') => nvl(concat('lookup_name', '[', "x", '] -- This is a placeholder, lookups are not supported in sampling'), 'replaceValue')
*/
export function changeLookupInExpressionsSampling(druidExpression: string): string {
if (!druidExpression.includes('lookup')) return druidExpression;

// The Druid expressions are very close to SQL so try parsing them as SQL, if it parses then this is a more robust way to apply this transformation
const parsedDruidExpression = SqlExpression.maybeParse(druidExpression);
if (parsedDruidExpression) {
return String(
parsedDruidExpression.walk(ex => {
if (ex instanceof SqlFunction && ex.getEffectiveFunctionName() === 'LOOKUP') {
if (ex.numArgs() < 2 || ex.numArgs() > 3) return SqlExpression.parse('null');
const concat = F(
'concat',
ex.getArg(1),
'[',
ex.getArg(0),
'] -- This is a placeholder, lookups are not supported in sampling',
);

const replaceMissingValueWith = ex.getArg(2);
if (!replaceMissingValueWith) return concat;
return F('nvl', concat, replaceMissingValueWith);
}
return ex;
}),
);
}

// If we can not parse the expression as SQL then bash it with a regexp
return druidExpression.replace(/lookup\s*\(([^)]+)\)/g, (_, argString: string) => {
const args = argString.trim().split(/\s*,\s*/);
if (args.length < 2 || args.length > 3) return 'null';
const concat = `concat(${args[1]},'[',${args[0]},'] -- This is a placeholder, lookups are not supported in sampling')`;
const replaceMissingValueWith = args[2];
if (!replaceMissingValueWith) return concat;
return `nvl(${concat},${replaceMissingValueWith})`;
});
}
Loading