RANK() query fails with two derived tables
These first two queries both work:
select *
FROM
(
-- leaf_node(a, fact_sales)
SELECT fact_sales."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt"
FROM fact_sales
JOIN max_dim_date
ON max_dim_date.the_date = fact_sales.the_date
GROUP BY fact_sales."store_id", max_dim_date."the_month"
) AS r2_1
LEFT OUTER JOIN
(
-- leaf_node(a, fact_discounts)
SELECT fact_discounts."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
SUM(fact_discounts."discount_amt") AS "fact_discounts_discount_amt"
FROM fact_discounts
JOIN max_dim_date
ON max_dim_date.the_date = fact_discounts.the_date
GROUP BY fact_discounts."store_id", max_dim_date."the_month"
) AS r2_2
ON r2_2."store__id" = r2_1."store__id"
AND r2_2."month_of_year__id" = r2_1."month_of_year__id"
select *,
RANK() OVER (PARTITION BY r2_1."store__id" ORDER BY r2_1."fact_sales_sales_amt" DESC) AS "__window_pred1"
FROM
(
-- leaf_node(a, fact_sales)
SELECT fact_sales."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt"
FROM fact_sales
JOIN max_dim_date
ON max_dim_date.the_date = fact_sales.the_date
GROUP BY fact_sales."store_id", max_dim_date."the_month"
) AS r2_1
But this next one fails with an error (I'll try to get the server log info in the future):
(TIOStreamTransport.java:read:130:org.apache.thrift.transport.TTransportException: Socket is closed by peer.
select *,
RANK() OVER (PARTITION BY r2_1."store__id" ORDER BY r2_1."fact_sales_sales_amt" DESC) AS "__window_pred1"
FROM
(
-- leaf_node(a, fact_sales)
SELECT fact_sales."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt"
FROM fact_sales
JOIN max_dim_date
ON max_dim_date.the_date = fact_sales.the_date
GROUP BY fact_sales."store_id", max_dim_date."the_month"
) AS r2_1
LEFT OUTER JOIN
(
-- leaf_node(a, fact_discounts)
SELECT fact_discounts."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
SUM(fact_discounts."discount_amt") AS "fact_discounts_discount_amt"
FROM fact_discounts
JOIN max_dim_date
ON max_dim_date.the_date = fact_discounts.the_date
GROUP BY fact_discounts."store_id", max_dim_date."the_month"
) AS r2_2
ON r2_2."store__id" = r2_1."store__id"
AND r2_2."month_of_year__id" = r2_1."month_of_year__id"
-
FWIW, the following query uses partitioned
RANK()
but only against a single derived table, and it fails with a generic server error.-- dataset_join_node(out, [out_out]) SELECT r2_1."month__id" AS "month__id", r2_1."store__id" AS "store__id", r2_1."sales_rating" AS "sales_rating", RANK() OVER (PARTITION BY r2_1."month__id" ORDER BY r2_1."sales_rating" DESC) AS "__window_pred1" FROM ( -- dataset_join_node(out_out, [out_t]) SELECT r3_1."month__id" AS "month__id", r3_1."store__id" AS "store__id", (CASE WHEN 6 = 0 THEN NULL ELSE (r3_1."a" + r3_1."b") / (6 * 1.0) END) * 100 AS "sales_rating" FROM ( -- leaf_join_node(out_t, [fact_sales, fact_discounts]) SELECT COALESCE(r4_1."month__id", r4_2."month__id") AS "month__id", COALESCE(r4_1."store__id", r4_2."store__id") AS "store__id", (r4_1."fact_sales_sales_amt" - .3) * 5 AS "a", (CASE WHEN (r4_1."fact_sales_tax_amt" - r4_2."fact_discounts_discount_amt") = 0 THEN NULL ELSE r4_1."fact_sales_sales_amt" / ((r4_1."fact_sales_tax_amt" - r4_2."fact_discounts_discount_amt") * 1.0) END - 3) * .25 AS "b" FROM ( -- leaf_node(out_t, fact_sales) SELECT max_dim_date."month_year_name" AS "month__id", fact_sales."store_id" AS "store__id", SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt" FROM fact_sales JOIN max_dim_date ON max_dim_date.the_date = fact_sales.the_date AND (max_dim_date.the_year = 2015) GROUP BY max_dim_date."month_year_name", fact_sales."store_id" ) AS r4_1 LEFT OUTER JOIN ( -- leaf_node(out_t, fact_discounts) SELECT max_dim_date."month_year_name" AS "month__id", fact_discounts."store_id" AS "store__id", SUM(fact_discounts."discount_amt") AS "fact_discounts_discount_amt" FROM fact_discounts JOIN max_dim_date ON max_dim_date.the_date = fact_discounts.the_date AND (max_dim_date.the_year = 2015) GROUP BY max_dim_date."month_year_name", fact_discounts."store_id" ) AS r4_2 ON r4_2."month__id" = r4_1."month__id" AND r4_2."store__id" = r4_1."store__id" ) AS r3_1 ) AS r2_1
-Larry
-
Seeing something similar with
LAG()
when there's a join.-- leaf_join_node(out, [fact_sales]) SELECT r1_1."month__id" AS "month__id", r1_1."region__id" AS "region__id", LAG(r1_1."fact_sales_sales_amt", 2) OVER(PARTITION BY r1_1."region__id" ORDER BY max_dim_date_seq."month_year_sequence") AS "metric1" FROM ( -- leaf_node(out, fact_sales) SELECT max_dim_date."month_year_name" AS "month__id", dim_store."region_id" AS "region__id", SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt" FROM fact_sales JOIN max_dim_date ON max_dim_date.the_date = fact_sales.the_date AND (max_dim_date.the_year = 2015) JOIN dim_store ON dim_store.store_id = fact_sales.store_id GROUP BY max_dim_date."month_year_name", dim_store."region_id" ) AS r1_1 JOIN ( SELECT DISTINCT month_year_name, month_year_sequence FROM max_dim_date ) AS max_dim_date_seq ON max_dim_date_seq.month_year_name = r1_1.month__id
Please sign in to leave a comment.
Comments
2 comments