From 75f9af7f56c38a9a65514b4eb1654e72e3961a73 Mon Sep 17 00:00:00 2001 From: Luke Imhoff Date: Fri, 14 Dec 2018 23:09:40 -0600 Subject: [PATCH] Use lag windowing function to reduce missing block time Ecto 3.0 gets us easy access to partitions and the windowing function `lag`. `lag` is able to keep track of the previous row without a self join. Importantly, the new query with `lag` and no more `generate_series` completes in ~2.5 seconds on Ethereum mainnet using `:timer.tc(Explorer.Chain, :missing_block_number_ranges, [6889126..0])` on ETH Mainnet Test DB 40 while the old version doesn't return after 5+ minutes. I've heavily commented the new version of the query since this took awhile to figure out and most importantly finding the proper terminology: "gaps and islands" was key to find the starting query using `lag`. The query in SQL is as follows: ```sql SELECT island.last_number + 1, island.next_number - 1 FROM ( SELECT lag(land.number) OVER "w" AS last_number, land.number AS next_number FROM ( SELECT blocks.number AS number FROM blocks WHERE blocks.consensus = TRUE UNION ALL ( SELECT LEAST(min_blocks.number, before_range_min) FROM ( SELECT min(blocks.number) AS number FROM blocks WHERE blocks.consensus = TRUE) ) AS min_blocks WHERE min_blocks.number IS NULL OR min_blocks.number != before_range_min ) UNION ALL ( SELECT GREATEST(max_blocks.number, after_range_max) FROM ( SELECT max(blocks.number) AS number FROM blocks WHERE blocks.consensus = TRUE ) AS max_blocks WHERE max_blocks.number IS NULL OR max_blocks.number != after_range_max ) AS land WINDOW "w" AS (ORDER BY land.number) ) AS islands WHERE islands.last_number != islands.next_number - 1 ORDER BY islands.last_number ``` --- apps/explorer/lib/explorer/chain.ex | 112 +++++++++++++++++----------- 1 file changed, 68 insertions(+), 44 deletions(-) diff --git a/apps/explorer/lib/explorer/chain.ex b/apps/explorer/lib/explorer/chain.ex index d72ac90197..80f7e1a1b4 100644 --- a/apps/explorer/lib/explorer/chain.ex +++ b/apps/explorer/lib/explorer/chain.ex @@ -11,6 +11,8 @@ defmodule Explorer.Chain do order_by: 2, order_by: 3, preload: 2, + subquery: 1, + union_all: 2, where: 2, where: 3 ] @@ -1244,54 +1246,76 @@ defmodule Explorer.Chain do def missing_block_number_ranges(range) def missing_block_number_ranges(range_start..range_end) do - {step, first, last, direction} = - if range_start <= range_end do - {1, :minimum, :maximum, :asc} - else - {-1, :maximum, :minimum, :desc} - end + # subquery so we can check for NULL in `range_min_query`, which happens for empty table + min_query = from(block in Block, select: %{number: min(block.number)}, where: block.consensus == true) + # this acts a fake found block, so it has to before the range of blocks we actually care to check + before_range_min = min(range_start, range_end) - 1 - query = - from( - b in Block, - right_join: - missing_block_number_range in fragment( - # adapted from https://www.xaprb.com/blog/2006/03/22/find-contiguous-ranges-with-sql/ - """ - (WITH missing_blocks AS - (SELECT number - FROM generate_series(? :: bigint, ? :: bigint, ? :: bigint) AS number - EXCEPT - SELECT blocks.number - FROM blocks - WHERE blocks.consensus = true) - SELECT no_previous.number AS minimum, - (SELECT MIN(no_next.number) - FROM missing_blocks AS no_next - LEFT OUTER JOIN missing_blocks AS next - ON no_next.number = next.number - 1 - WHERE next.number IS NULL AND - no_next.number >= no_previous.number) AS maximum - FROM missing_blocks as no_previous - LEFT OUTER JOIN missing_blocks AS previous - ON previous.number = no_previous.number - 1 - WHERE previous.number IS NULL) - """, - ^range_start, - ^range_end, - ^step - ), - select: %Range{ - first: field(missing_block_number_range, ^first), - last: field(missing_block_number_range, ^last) + range_min_query = + from(min_block in subquery(min_query), + select: %{ + # `LEAST` ignores `NULL`, so it picks the fake range when there is no `min_block.number` + # because `blocks` is empty + number: fragment("LEAST(?, ?)", min_block.number, ^before_range_min) }, - order_by: [{^direction, field(missing_block_number_range, ^first)}], - # needed because the join makes a cartesian product with all block rows, but we need to use Block to make - # Ecto work. - distinct: true + # `blocks` is empty + # same number will not be returned by `number_query` + where: is_nil(min_block.number) or min_block.number != ^before_range_min ) - Repo.all(query, timeout: :infinity) + number_query = from(block in Block, select: %{number: block.number}, where: block.consensus == true) + + # subquery so we can check for NULL in `range_max_query`, which happens for empty table + max_query = from(block in Block, select: %{number: max(block.number)}, where: block.consensus == true) + # this acts a fake found block, so it has to after the range of blocks we actually care to check + after_range_max = max(range_start, range_end) + 1 + + range_max_query = + from(max_block in subquery(max_query), + select: %{ + # `GREATEST` ignores `NULL`, so it picks the fake range when there is no `max_block.number` + # because `blocks` is empty + number: fragment("GREATEST(?, ?)", max_block.number, ^after_range_max) + }, + # blocks is empty + # same number will not be returned by `number_query` + where: is_nil(max_block.number) or max_block.number != ^after_range_max + ) + + # The actual blocks and a boundary of fake found blocks outside of `range_start..range_end` so that there is always + # a `lag` block + search_range_query = + number_query + |> union_all(^range_min_query) + |> union_all(^range_max_query) + + # Gaps and Islands is the term-of-art for finding the runs of missing (gaps) and existing (islands) data. If you + # Google for `sql missing ranges` you won't find much, but `sql gaps and islands` will get a lot of hits. + + island_query = + from( + search_block in subquery(search_range_query), + windows: [w: [order_by: search_block.number]], + select: %{last_number: search_block.number |> lag() |> over(:w), next_number: search_block.number} + ) + + gap_query = + from( + island in subquery(island_query), + where: island.last_number != island.next_number - 1, + select: %Range{first: island.last_number + 1, last: island.next_number - 1}, + order_by: island.last_number + ) + + ascending = Repo.all(gap_query, timeout: :infinity) + + if range_start <= range_end do + ascending + else + ascending + |> Enum.reverse() + |> Enum.map(fn first..last -> last..first end) + end end @doc """