feat(cubesql): Penalize zero members in wrapper (#8927)

This would allow to extract fully assembled `CubeScan` under wrapper instead of `CubeScan(allMembers, ungrouped=true)`. Before this there were two related components in cost: `non_detected_cube_scans` and `cube_members` `non_detected_cube_scans` allows to penalize `CubeScan` without members specifically outside the wrapper. This is pretty hard penalty, queries like that are Not Good `cube_members` allows to prefer queries will less members, which seems fine. But on it's own it would prefer query with zero member, which is, actually, all the members. New cost component added: `zero_members_wrapper`. It would stand right before `cube_members`, and allow to penalize no-members representation before `cube_members` starts impacting extraction. New `CubeScan` extractions surfaced a couple of bugs related to aliasing in generated SQL, hence all the supporting stuff: * Support member alias for TD with granularity Before this schema compiler didn't use aliases for `cube.timeDimension.granularity` members * Extract ColumnRemapping and Remapper structs * Implement column remapping and literal member handling for `CubeScan` in wrapper Now column names, introduced by DataFusion, would get renamed, and that would avoid sending too long or incorrect aliases to Cube for SQL generation, and later to data source. DF can generate names like `datetrunc(Utf8("day"),Orders.createdAt)`, and aliases like that are not expected by JS side Single `CubeScan` can represent join of multiple `TableScan`s, they can have different table aliases, and column expressions on top of `CubeScan` in original plan can have different qualifiers. But generated SQL can have only one table alias, so all column expressions on top needs to be remapped to that single alias as well. * Support literal members in CubeScan under wrapper Now SQL generated for `CubeScan` will not skip literal members from CubeSCan, and generate SELECT wrapper with literal members as literal columns.
cube-js · Dec 17, 2024 · 171ea35 · 171ea35
1 parent e661d2a
commit 171ea35
Show file tree

Hide file tree

Showing 7 changed files with 662 additions and 155 deletions.
diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseTimeDimension.ts b/packages/cubejs-schema-compiler/src/adapter/BaseTimeDimension.ts
@@ -70,10 +70,14 @@ export class BaseTimeDimension extends BaseFilter {
     return super.aliasName();
   }
 
-  // @ts-ignore
-  public unescapedAliasName(granularity: string) {
+  public unescapedAliasName(granularity?: string) {
     const actualGranularity = granularity || this.granularityObj?.granularity || 'day';
 
+    const fullName = `${this.dimension}.${actualGranularity}`;
+    if (this.query.options.memberToAlias && this.query.options.memberToAlias[fullName]) {
+      return this.query.options.memberToAlias[fullName];
+    }
+
     return `${this.query.aliasName(this.dimension)}_${actualGranularity}`; // TODO date here for rollups
   }
 

diff --git a/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap b/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap
@@ -232,6 +232,84 @@ Array [
 ]
 `;
 
+exports[`SQL API Postgres (Data) select __user and literal grouped under wrapper: select __user and literal in wrapper 1`] = `
+Array [
+  Object {
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "new",
+    "my_user": null,
+  },
+  Object {
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "processed",
+    "my_user": null,
+  },
+  Object {
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "shipped",
+    "my_user": null,
+  },
+]
+`;
+
+exports[`SQL API Postgres (Data) select __user and literal grouped: select __user and literal 1`] = `
+Array [
+  Object {
+    "Int64(2)": "2",
+    "__cubeJoinField": null,
+    "datetrunc(Utf8(\\"day\\"),Orders.createdAt)": 2024-01-01T00:00:00.000Z,
+    "id": 1,
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "new",
+    "my_user": null,
+  },
+  Object {
+    "Int64(2)": "2",
+    "__cubeJoinField": null,
+    "datetrunc(Utf8(\\"day\\"),Orders.createdAt)": 2024-01-02T00:00:00.000Z,
+    "id": 2,
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "new",
+    "my_user": null,
+  },
+  Object {
+    "Int64(2)": "2",
+    "__cubeJoinField": null,
+    "datetrunc(Utf8(\\"day\\"),Orders.createdAt)": 2024-01-03T00:00:00.000Z,
+    "id": 3,
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "processed",
+    "my_user": null,
+  },
+  Object {
+    "Int64(2)": "2",
+    "__cubeJoinField": null,
+    "datetrunc(Utf8(\\"day\\"),Orders.createdAt)": 2024-01-04T00:00:00.000Z,
+    "id": 4,
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "processed",
+    "my_user": null,
+  },
+  Object {
+    "Int64(2)": "2",
+    "__cubeJoinField": null,
+    "datetrunc(Utf8(\\"day\\"),Orders.createdAt)": 2024-01-05T00:00:00.000Z,
+    "id": 5,
+    "my_created_at": 2024-01-01T00:00:00.000Z,
+    "my_literal": "1",
+    "my_status": "shipped",
+    "my_user": null,
+  },
+]
+`;
+
 exports[`SQL API Postgres (Data) select null in subquery with streaming 1`] = `
 Array [
   Object {

diff --git a/packages/cubejs-testing/test/smoke-cubesql.test.ts b/packages/cubejs-testing/test/smoke-cubesql.test.ts
@@ -404,6 +404,78 @@ describe('SQL API', () => {
       expect(res.rows).toEqual([{ max: null }]);
     });
 
+    test('select __user and literal grouped', async () => {
+      const query = `
+        SELECT
+          status AS my_status,
+          date_trunc('month', createdAt) AS my_created_at,
+          __user AS my_user,
+          1 AS my_literal,
+          -- Columns without aliases should also work
+          id,
+          date_trunc('day', createdAt),
+          __cubeJoinField,
+          2
+        FROM
+          Orders
+        GROUP BY 1,2,3,4,5,6,7,8
+        ORDER BY 1,2,3,4,5,6,7,8
+      `;
+
+      const res = await connection.query(query);
+      expect(res.rows).toMatchSnapshot('select __user and literal');
+    });
+
+    test('select __user and literal grouped under wrapper', async () => {
+      const query = `
+        WITH
+-- This subquery should be represented as CubeScan(ungrouped=false) inside CubeScanWrapper
+cube_scan_subq AS (
+  SELECT
+    status AS my_status,
+    date_trunc('month', createdAt) AS my_created_at,
+    __user AS my_user,
+    1 AS my_literal,
+    -- Columns without aliases should also work
+    id,
+    date_trunc('day', createdAt),
+    __cubeJoinField,
+    2
+  FROM Orders
+  GROUP BY 1,2,3,4,5,6,7,8
+),
+filter_subq AS (
+  SELECT
+    status status_filter
+  FROM Orders
+  GROUP BY
+    status_filter
+)
+        SELECT
+          -- Should use SELECT * here to reference columns without aliases.
+          -- But it's broken ATM in DF, initial plan contains \`Projection: ... #__subquery-0.logs_content_filter\` on top, but it should not be there
+          -- TODO fix it
+          my_created_at,
+          my_status,
+          my_user,
+          my_literal
+        FROM cube_scan_subq
+        WHERE
+          -- This subquery filter should trigger wrapping of whole query
+          my_status IN (
+            SELECT
+              status_filter
+            FROM filter_subq
+          )
+        GROUP BY 1,2,3,4
+        ORDER BY 1,2,3,4
+        ;
+        `;
+
+      const res = await connection.query(query);
+      expect(res.rows).toMatchSnapshot('select __user and literal in wrapper');
+    });
+
     test('where segment is false', async () => {
       const query =
         'SELECT value AS val, * FROM "SegmentTest" WHERE segment_eq_1 IS FALSE ORDER BY value;';