Skip to content

Commit cd7e9fd

Browse files
authored
fix(schema-compiler): Use join paths from pre-aggregation declaration instead of building join tree from scratch (#9471)
Without this pre-aggregatoin declaration would build a list of members, and build join tree from scratch. New join tree could be different, and pre-aggregation would contain incorrect data. Add join hints to member adapters, to build join tree with proper members. This is more of a hack than proper solution: join tree would still disallow to have same cube twice with different join paths. Just adding extra join hints on query level does not work because of FKQA and multiplication check in subqueries: we have to know join path for every member, so it should be either explicit or unambigous _for each member_. For now, only pre-aggregation itself respects join paths, but pre-aggregation matching does same as before: strips join path from pre-aggregation references and compares members directly. User should manually ensure that views and pre-aggregations both match each other and does not produce false matches.
1 parent fed08e1 commit cd7e9fd

File tree

13 files changed

+658
-198
lines changed

13 files changed

+658
-198
lines changed

packages/cubejs-query-orchestrator/src/orchestrator/PreAggregations.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ export class PreAggregations {
590590

591591
return {
592592
...queryBody,
593-
preAggregations: expandedPreAggregations.reduce((a, b) => a.concat(b), []),
593+
preAggregations: expandedPreAggregations.flat(),
594594
groupedPartitionPreAggregations: expandedPreAggregations
595595
};
596596
}

packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts

+11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type { BaseQuery } from './BaseQuery';
22
import type { DimensionDefinition, SegmentDefinition } from '../compiler/CubeEvaluator';
3+
import { CubeSymbols } from "../compiler/CubeSymbols";
34

45
export class BaseDimension {
56
public readonly expression: any;
@@ -10,6 +11,8 @@ export class BaseDimension {
1011

1112
public readonly isMemberExpression: boolean = false;
1213

14+
public readonly joinHint: Array<string> = [];
15+
1316
public constructor(
1417
protected readonly query: BaseQuery,
1518
public readonly dimension: any
@@ -20,6 +23,14 @@ export class BaseDimension {
2023
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2124
this.expressionName = dimension.expressionName || `${dimension.cubeName}.${dimension.name}`;
2225
this.isMemberExpression = !!dimension.definition;
26+
} else {
27+
// TODO move this `as` to static types
28+
const dimensionPath = dimension as string | null;
29+
if (dimensionPath !== null) {
30+
const { path, joinHint } = CubeSymbols.joinHintFromPath(dimensionPath);
31+
this.dimension = path;
32+
this.joinHint = joinHint;
33+
}
2334
}
2435
}
2536

packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts

+9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { UserError } from '../compiler/UserError';
22
import type { BaseQuery } from './BaseQuery';
33
import { MeasureDefinition } from '../compiler/CubeEvaluator';
4+
import { CubeSymbols } from "../compiler/CubeSymbols";
45

56
export class BaseMeasure {
67
public readonly expression: any;
@@ -13,6 +14,8 @@ export class BaseMeasure {
1314

1415
protected readonly patchedMeasure: MeasureDefinition | null = null;
1516

17+
public readonly joinHint: Array<string> = [];
18+
1619
protected preparePatchedMeasure(sourceMeasure: string, newMeasureType: string | null, addFilters: Array<{sql: Function}>): MeasureDefinition {
1720
const source = this.query.cubeEvaluator.measureByPath(sourceMeasure);
1821

@@ -123,6 +126,12 @@ export class BaseMeasure {
123126
measure.expression.addFilters,
124127
);
125128
}
129+
} else {
130+
// TODO move this `as` to static types
131+
const measurePath = measure as string;
132+
const { path, joinHint } = CubeSymbols.joinHintFromPath(measurePath);
133+
this.measure = path;
134+
this.joinHint = joinHint;
126135
}
127136
}
128137

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

+80-72
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import {
2424
timeSeriesFromCustomInterval
2525
} from '@cubejs-backend/shared';
2626

27+
import { CubeSymbols } from "../compiler/CubeSymbols";
2728
import { UserError } from '../compiler/UserError';
2829
import { SqlParser } from '../parser/SqlParser';
2930
import { BaseDimension } from './BaseDimension';
@@ -307,16 +308,16 @@ export class BaseQuery {
307308
}
308309

309310
prebuildJoin() {
310-
if (this.useNativeSqlPlanner) {
311-
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
312-
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
313-
try {
314-
this.join = this.joinGraph.buildJoin(this.allJoinHints);
315-
} catch (e) {
316-
// Ignore
317-
}
318-
} else {
311+
try {
312+
// TODO allJoinHints should contain join hints form pre-agg
319313
this.join = this.joinGraph.buildJoin(this.allJoinHints);
314+
} catch (e) {
315+
if (this.useNativeSqlPlanner) {
316+
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
317+
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
318+
} else {
319+
throw e;
320+
}
320321
}
321322
}
322323

@@ -363,6 +364,10 @@ export class BaseQuery {
363364
return this.collectedCubeNames;
364365
}
365366

367+
/**
368+
*
369+
* @returns {Array<Array<string>>}
370+
*/
366371
get allJoinHints() {
367372
if (!this.collectedJoinHints) {
368373
this.collectedJoinHints = this.collectJoinHints();
@@ -1203,7 +1208,16 @@ export class BaseQuery {
12031208

12041209
collectAllMultiStageMembers(allMemberChildren) {
12051210
const allMembers = R.uniq(R.flatten(Object.keys(allMemberChildren).map(k => [k].concat(allMemberChildren[k]))));
1206-
return R.fromPairs(allMembers.map(m => ([m, this.memberInstanceByPath(m).isMultiStage()])));
1211+
return R.fromPairs(allMembers.map(m => {
1212+
// When `m` is coming from `collectAllMemberChildren`, it can contain `granularities.customGranularityName` in path
1213+
// And it would mess up with join hints detection
1214+
const trimmedPath = this
1215+
.cubeEvaluator
1216+
.parsePathAnyType(m)
1217+
.slice(0, 2)
1218+
.join('.');
1219+
return [m, this.memberInstanceByPath(trimmedPath).isMultiStage()];
1220+
}));
12071221
}
12081222

12091223
memberInstanceByPath(m) {
@@ -1398,7 +1412,7 @@ export class BaseQuery {
13981412
// TODO condition should something else instead of rank
13991413
multiStageQuery: !!withQuery.measures.find(d => {
14001414
const { type } = this.newMeasure(d).definition();
1401-
return type === 'rank' || BaseQuery.isCalculatedMeasureType(type);
1415+
return type === 'rank' || CubeSymbols.isCalculatedMeasureType(type);
14021416
}),
14031417
disableExternalPreAggregations: true,
14041418
};
@@ -1992,7 +2006,7 @@ export class BaseQuery {
19922006
);
19932007

19942008
if (shouldBuildJoinForMeasureSelect) {
1995-
const joinHints = this.collectFrom(measures, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2009+
const joinHints = this.collectJoinHintsFromMembers(measures);
19962010
const measuresJoin = this.joinGraph.buildJoin(joinHints);
19972011
if (measuresJoin.multiplicationFactor[keyCubeName]) {
19982012
throw new UserError(
@@ -2046,6 +2060,11 @@ export class BaseQuery {
20462060
(!this.safeEvaluateSymbolContext().ungrouped && this.aggregateSubQueryGroupByClause() || '');
20472061
}
20482062

2063+
/**
2064+
* @param {Array<BaseMeasure>} measures
2065+
* @param {string} keyCubeName
2066+
* @returns {boolean}
2067+
*/
20492068
checkShouldBuildJoinForMeasureSelect(measures, keyCubeName) {
20502069
// When member expression references view, it would have to collect join hints from view
20512070
// Consider join A->B, as many-to-one, so B is multiplied and A is not, and member expression like SUM(AB_view.dimB)
@@ -2067,7 +2086,11 @@ export class BaseQuery {
20672086
.filter(member => member.definition().ownedByCube);
20682087

20692088
const cubes = this.collectFrom(nonViewMembers, this.collectCubeNamesFor.bind(this), 'collectCubeNamesFor');
2070-
const joinHints = this.collectFrom(nonViewMembers, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2089+
// Not using `collectJoinHintsFromMembers([measure])` because it would collect too many join hints from view
2090+
const joinHints = [
2091+
measure.joinHint,
2092+
...this.collectJoinHintsFromMembers(nonViewMembers),
2093+
];
20712094
if (R.any(cubeName => keyCubeName !== cubeName, cubes)) {
20722095
const measuresJoin = this.joinGraph.buildJoin(joinHints);
20732096
if (measuresJoin.multiplicationFactor[keyCubeName]) {
@@ -2186,12 +2209,29 @@ export class BaseQuery {
21862209
);
21872210
}
21882211

2212+
/**
2213+
*
2214+
* @param {boolean} [excludeTimeDimensions=false]
2215+
* @returns {Array<Array<string>>}
2216+
*/
21892217
collectJoinHints(excludeTimeDimensions = false) {
2190-
return this.collectFromMembers(
2191-
excludeTimeDimensions,
2192-
this.collectJoinHintsFor.bind(this),
2193-
'collectJoinHintsFor'
2194-
);
2218+
const membersToCollectFrom = this.allMembersConcat(excludeTimeDimensions)
2219+
.concat(this.join ? this.join.joins.map(j => ({
2220+
getMembers: () => [{
2221+
path: () => null,
2222+
cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom),
2223+
definition: () => j.join,
2224+
}]
2225+
})) : []);
2226+
2227+
return this.collectJoinHintsFromMembers(membersToCollectFrom);
2228+
}
2229+
2230+
collectJoinHintsFromMembers(members) {
2231+
return [
2232+
...members.map(m => m.joinHint).filter(h => h?.length > 0),
2233+
...this.collectFrom(members, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFromMembers'),
2234+
];
21952235
}
21962236

21972237
collectFromMembers(excludeTimeDimensions, fn, methodName) {
@@ -2206,6 +2246,11 @@ export class BaseQuery {
22062246
return this.collectFrom(membersToCollectFrom, fn, methodName);
22072247
}
22082248

2249+
/**
2250+
*
2251+
* @param {boolean} excludeTimeDimensions
2252+
* @returns {Array<BaseMeasure | BaseDimension | BaseSegment>}
2253+
*/
22092254
allMembersConcat(excludeTimeDimensions) {
22102255
return this.measures
22112256
.concat(this.dimensions)
@@ -2902,7 +2947,7 @@ export class BaseQuery {
29022947
funDef = this.countDistinctApprox(evaluateSql);
29032948
} else if (symbol.type === 'countDistinct' || symbol.type === 'count' && !symbol.sql && multiplied) {
29042949
funDef = `count(distinct ${evaluateSql})`;
2905-
} else if (BaseQuery.isCalculatedMeasureType(symbol.type)) {
2950+
} else if (CubeSymbols.isCalculatedMeasureType(symbol.type)) {
29062951
// TODO calculated measure type will be ungrouped
29072952
// if (this.multiStageDimensions.length !== this.dimensions.length) {
29082953
// throw new UserError(`Calculated measure '${measurePath}' uses group_by or reduce_by context modifiers while it isn't allowed`);
@@ -2928,23 +2973,12 @@ export class BaseQuery {
29282973
return this.primaryKeyCount(cubeName, true);
29292974
}
29302975
}
2931-
if (BaseQuery.isCalculatedMeasureType(symbol.type)) {
2976+
if (CubeSymbols.isCalculatedMeasureType(symbol.type)) {
29322977
return evaluateSql;
29332978
}
29342979
return `${symbol.type}(${evaluateSql})`;
29352980
}
29362981

2937-
static isCalculatedMeasureType(type) {
2938-
return type === 'number' || type === 'string' || type === 'time' || type === 'boolean';
2939-
}
2940-
2941-
/**
2942-
TODO: support type qualifiers on min and max
2943-
*/
2944-
static toMemberDataType(type) {
2945-
return this.isCalculatedMeasureType(type) ? type : 'number';
2946-
}
2947-
29482982
aggregateOnGroupedColumn(symbol, evaluateSql, topLevelMerge, measurePath) {
29492983
const cumulativeMeasureFilters = (this.safeEvaluateSymbolContext().cumulativeMeasureFilters || {})[measurePath];
29502984
if (cumulativeMeasureFilters) {
@@ -4051,7 +4085,7 @@ export class BaseQuery {
40514085
sqlUtils: {
40524086
convertTz: (field) => field,
40534087
},
4054-
securityContext: BaseQuery.contextSymbolsProxyFrom({}, allocateParam),
4088+
securityContext: CubeSymbols.contextSymbolsProxyFrom({}, allocateParam),
40554089
};
40564090
}
40574091

@@ -4066,41 +4100,7 @@ export class BaseQuery {
40664100
}
40674101

40684102
contextSymbolsProxy(symbols) {
4069-
return BaseQuery.contextSymbolsProxyFrom(symbols, this.paramAllocator.allocateParam.bind(this.paramAllocator));
4070-
}
4071-
4072-
static contextSymbolsProxyFrom(symbols, allocateParam) {
4073-
return new Proxy(symbols, {
4074-
get: (target, name) => {
4075-
const propValue = target[name];
4076-
const methods = (paramValue) => ({
4077-
filter: (column) => {
4078-
if (paramValue) {
4079-
const value = Array.isArray(paramValue) ?
4080-
paramValue.map(allocateParam) :
4081-
allocateParam(paramValue);
4082-
if (typeof column === 'function') {
4083-
return column(value);
4084-
} else {
4085-
return `${column} = ${value}`;
4086-
}
4087-
} else {
4088-
return '1 = 1';
4089-
}
4090-
},
4091-
requiredFilter: (column) => {
4092-
if (!paramValue) {
4093-
throw new UserError(`Filter for ${column} is required`);
4094-
}
4095-
return methods(paramValue).filter(column);
4096-
},
4097-
unsafeValue: () => paramValue
4098-
});
4099-
return methods(target)[name] ||
4100-
typeof propValue === 'object' && propValue !== null && BaseQuery.contextSymbolsProxyFrom(propValue, allocateParam) ||
4101-
methods(propValue);
4102-
}
4103-
});
4103+
return CubeSymbols.contextSymbolsProxyFrom(symbols, this.paramAllocator.allocateParam.bind(this.paramAllocator));
41044104
}
41054105

41064106
static extractFilterMembers(filter) {
@@ -4306,6 +4306,11 @@ export class BaseQuery {
43064306
});
43074307
}
43084308

4309+
/**
4310+
*
4311+
* @param {boolean} excludeSegments
4312+
* @returns {Array<BaseMeasure | BaseDimension | BaseSegment>}
4313+
*/
43094314
flattenAllMembers(excludeSegments = false) {
43104315
return R.flatten(
43114316
this.measures
@@ -4326,9 +4331,14 @@ export class BaseQuery {
43264331
return this.backAliasMembers(this.flattenAllMembers());
43274332
}
43284333

4334+
/**
4335+
*
4336+
* @param {Array<BaseMeasure | BaseDimension | BaseSegment>} members
4337+
* @returns {Record<string, string>}
4338+
*/
43294339
backAliasMembers(members) {
43304340
const query = this;
4331-
return members.map(
4341+
return Object.fromEntries(members.flatMap(
43324342
member => {
43334343
const collectedMembers = query.evaluateSymbolSqlWithContext(
43344344
() => query.collectFrom([member], query.collectMemberNamesFor.bind(query), 'collectMemberNamesFor'),
@@ -4343,10 +4353,8 @@ export class BaseQuery {
43434353
}
43444354
return !nonAliasSeen;
43454355
})
4346-
.map(d => (
4347-
{ [query.cubeEvaluator.byPathAnyType(d).aliasMember]: memberPath }
4348-
)).reduce((a, b) => ({ ...a, ...b }), {});
4356+
.map(d => [query.cubeEvaluator.byPathAnyType(d).aliasMember, memberPath]);
43494357
}
4350-
).reduce((a, b) => ({ ...a, ...b }), {});
4358+
));
43514359
}
43524360
}

packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts

+9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { BaseQuery } from './BaseQuery';
2+
import { CubeSymbols } from "../compiler/CubeSymbols";
23

34
export class BaseSegment {
45
public readonly expression: any;
@@ -9,6 +10,8 @@ export class BaseSegment {
910

1011
public readonly isMemberExpression: boolean = false;
1112

13+
public readonly joinHint: Array<string> = [];
14+
1215
public constructor(
1316
protected readonly query: BaseQuery,
1417
public readonly segment: string | any
@@ -19,6 +22,12 @@ export class BaseSegment {
1922
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2023
this.expressionName = segment.expressionName || `${segment.cubeName}.${segment.name}`;
2124
this.isMemberExpression = !!segment.definition;
25+
} else {
26+
// TODO move this `as` to static types
27+
const segmentPath = segment as string;
28+
const { path, joinHint } = CubeSymbols.joinHintFromPath(segmentPath);
29+
this.segment = path;
30+
this.joinHint = joinHint;
2231
}
2332
}
2433

0 commit comments

Comments
 (0)