Skip to main content

zebra_state/service/read/
find.rs

1//! Finding and reading block hashes and headers, in response to peer requests.
2//!
3//! In the functions in this module:
4//!
5//! The block write task commits blocks to the finalized state before updating
6//! `chain` with a cached copy of the best non-finalized chain from
7//! `NonFinalizedState.chain_set`. Then the block commit task can commit additional blocks to
8//! the finalized state after we've cloned the `chain`.
9//!
10//! This means that some blocks can be in both:
11//! - the cached [`Chain`], and
12//! - the shared finalized [`ZebraDb`] reference.
13
14use std::{
15    iter,
16    ops::{RangeBounds, RangeInclusive},
17    sync::Arc,
18};
19
20use chrono::{DateTime, Utc};
21use zebra_chain::{
22    amount::NonNegative,
23    block::{self, Block, Height},
24    serialization::DateTime32,
25    value_balance::ValueBalance,
26};
27
28use crate::{
29    constants,
30    service::{
31        block_iter::any_ancestor_blocks,
32        check::{difficulty::POW_MEDIAN_BLOCK_SPAN, AdjustedDifficulty},
33        finalized_state::ZebraDb,
34        non_finalized_state::{Chain, NonFinalizedState},
35        read::{self, block::block_header, FINALIZED_STATE_QUERY_RETRIES},
36    },
37    BoxError, KnownBlock,
38};
39
40#[cfg(test)]
41mod tests;
42
43/// Returns the tip of the best chain in the non-finalized or finalized state.
44pub fn best_tip(
45    non_finalized_state: &NonFinalizedState,
46    db: &ZebraDb,
47) -> Option<(block::Height, block::Hash)> {
48    tip(non_finalized_state.best_chain(), db)
49}
50
51/// Returns the tip of `chain`.
52/// If there is no chain, returns the tip of `db`.
53pub fn tip<C>(chain: Option<C>, db: &ZebraDb) -> Option<(Height, block::Hash)>
54where
55    C: AsRef<Chain>,
56{
57    // # Correctness
58    //
59    // If there is an overlap between the non-finalized and finalized states,
60    // where the finalized tip is above the non-finalized tip,
61    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
62    // so it is acceptable to return either tip.
63    chain
64        .map(|chain| chain.as_ref().non_finalized_tip())
65        .or_else(|| db.tip())
66}
67
68/// Returns the tip block of `chain`.
69/// If there is no chain, returns the tip of `db`.
70pub fn tip_block<C>(chain: Option<C>, db: &ZebraDb) -> Option<Arc<Block>>
71where
72    C: AsRef<Chain>,
73{
74    // # Correctness
75    //
76    // If there is an overlap between the non-finalized and finalized states,
77    // where the finalized tip is above the non-finalized tip,
78    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
79    // so it is acceptable to return either tip.
80    chain
81        .and_then(|chain| chain.as_ref().tip_block().map(|b| b.block.clone()))
82        .or_else(|| db.tip_block())
83}
84
85/// Returns the tip [`Height`] of `chain`.
86/// If there is no chain, returns the tip of `db`.
87pub fn tip_height<C>(chain: Option<C>, db: &ZebraDb) -> Option<Height>
88where
89    C: AsRef<Chain>,
90{
91    tip(chain, db).map(|(height, _hash)| height)
92}
93
94/// Returns the tip [`block::Hash`] of `chain`.
95/// If there is no chain, returns the tip of `db`.
96#[allow(dead_code)]
97pub fn tip_hash<C>(chain: Option<C>, db: &ZebraDb) -> Option<block::Hash>
98where
99    C: AsRef<Chain>,
100{
101    tip(chain, db).map(|(_height, hash)| hash)
102}
103
104/// Returns the tip of `chain` with its [`ValueBalance`].
105/// If there is no chain, returns the tip of `db`.
106pub fn tip_with_value_balance<C>(
107    chain: Option<C>,
108    db: &ZebraDb,
109) -> Result<Option<(Height, block::Hash, ValueBalance<NonNegative>)>, BoxError>
110where
111    C: AsRef<Chain>,
112{
113    match chain.map(|chain| chain.as_ref().non_finalized_tip_with_value_balance()) {
114        tip_with_value_balance @ Some(_) => Ok(tip_with_value_balance),
115        None => {
116            // Retry the finalized state query if it was interrupted by a finalizing block.
117            //
118            // TODO: refactor this into a generic retry(finalized_closure, process_and_check_closure) fn
119            for _ in 0..=FINALIZED_STATE_QUERY_RETRIES {
120                let tip @ Some((tip_height, tip_hash)) = db.tip() else {
121                    return Ok(None);
122                };
123
124                let value_balance = db.finalized_value_pool();
125
126                if tip == db.tip() {
127                    return Ok(Some((tip_height, tip_hash, value_balance)));
128                }
129            }
130
131            Err("Zebra is committing too many blocks to the state, \
132                    wait until it syncs to the chain tip"
133                .into())
134        }
135    }
136}
137
138/// Return the depth of block `hash` from the chain tip.
139/// Searches `chain` for `hash`, then searches `db`.
140pub fn depth<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> Option<u32>
141where
142    C: AsRef<Chain>,
143{
144    let chain = chain.as_ref();
145
146    // # Correctness
147    //
148    // It is ok to do this lookup in two different calls. Finalized state updates
149    // can only add overlapping blocks, and hashes are unique.
150
151    let tip = tip_height(chain, db)?;
152    let height = height_by_hash(chain, db, hash)?;
153
154    Some(tip.0 - height.0)
155}
156
157/// Returns the location of the block if present in the non-finalized state.
158/// Returns None if the block hash is not found in the non-finalized state.
159pub fn non_finalized_state_contains_block_hash(
160    non_finalized_state: &NonFinalizedState,
161    hash: block::Hash,
162) -> Option<KnownBlock> {
163    let mut chains_iter = non_finalized_state.chain_iter();
164    let is_hash_in_chain = |chain: &Arc<Chain>| chain.contains_block_hash(hash);
165
166    // Equivalent to `chain_set.iter().next_back()` in `NonFinalizedState.best_chain()` method.
167    let best_chain = chains_iter.next();
168
169    match best_chain.map(is_hash_in_chain) {
170        Some(true) => Some(KnownBlock::BestChain),
171        Some(false) if chains_iter.any(is_hash_in_chain) => Some(KnownBlock::SideChain),
172        Some(false) | None => None,
173    }
174}
175
176/// Returns the location of the block if present in the finalized state.
177/// Returns None if the block hash is not found in the finalized state.
178pub fn finalized_state_contains_block_hash(db: &ZebraDb, hash: block::Hash) -> Option<KnownBlock> {
179    db.contains_hash(hash).then_some(KnownBlock::Finalized)
180}
181
182/// Return the height for the block at `hash`, if `hash` is in `chain` or `db`.
183pub fn height_by_hash<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> Option<Height>
184where
185    C: AsRef<Chain>,
186{
187    // # Correctness
188    //
189    // Finalized state updates can only add overlapping blocks, and hashes are unique.
190
191    chain
192        .and_then(|chain| chain.as_ref().height_by_hash(hash))
193        .or_else(|| db.height(hash))
194}
195
196/// Return the hash for the block at `height`, if `height` is in `chain` or `db`.
197pub fn hash_by_height<C>(chain: Option<C>, db: &ZebraDb, height: Height) -> Option<block::Hash>
198where
199    C: AsRef<Chain>,
200{
201    // # Correctness
202    //
203    // Finalized state updates can only add overlapping blocks, and heights are unique
204    // in the current `chain`.
205    //
206    // If there is an overlap between the non-finalized and finalized states,
207    // where the finalized tip is above the non-finalized tip,
208    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
209    // so it is acceptable to return hashes from either chain.
210
211    chain
212        .and_then(|chain| chain.as_ref().hash_by_height(height))
213        .or_else(|| db.hash(height))
214}
215
216/// Return true if `hash` is in `chain` or `db`.
217pub fn chain_contains_hash<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> bool
218where
219    C: AsRef<Chain>,
220{
221    // # Correctness
222    //
223    // Finalized state updates can only add overlapping blocks, and hashes are unique.
224    //
225    // If there is an overlap between the non-finalized and finalized states,
226    // where the finalized tip is above the non-finalized tip,
227    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
228    // so it is acceptable to return hashes from either chain.
229
230    chain
231        .map(|chain| chain.as_ref().height_by_hash.contains_key(&hash))
232        .unwrap_or(false)
233        || db.contains_hash(hash)
234}
235
236/// Create a block locator from `chain` and `db`.
237///
238/// A block locator is used to efficiently find an intersection of two node's chains.
239/// It contains a list of block hashes at decreasing heights, skipping some blocks,
240/// so that any intersection can be located, no matter how long or different the chains are.
241pub fn block_locator<C>(chain: Option<C>, db: &ZebraDb) -> Option<Vec<block::Hash>>
242where
243    C: AsRef<Chain>,
244{
245    let chain = chain.as_ref();
246
247    // # Correctness
248    //
249    // It is ok to do these lookups using multiple database calls. Finalized state updates
250    // can only add overlapping blocks, and hashes are unique.
251    //
252    // If there is an overlap between the non-finalized and finalized states,
253    // where the finalized tip is above the non-finalized tip,
254    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
255    // so it is acceptable to return a set of hashes from multiple chains.
256    //
257    // Multiple heights can not map to the same hash, even in different chains,
258    // because the block height is covered by the block hash,
259    // via the transaction merkle tree commitments.
260    let tip_height = tip_height(chain, db)?;
261
262    let heights = block_locator_heights(tip_height);
263    let mut hashes = Vec::with_capacity(heights.len());
264
265    for height in heights {
266        if let Some(hash) = hash_by_height(chain, db, height) {
267            hashes.push(hash);
268        }
269    }
270
271    Some(hashes)
272}
273
274/// Get the heights of the blocks for constructing a block_locator list.
275///
276/// Zebra uses a decreasing list of block heights, starting at the tip, and skipping some heights.
277/// See [`block_locator()`] for details.
278pub fn block_locator_heights(tip_height: block::Height) -> Vec<block::Height> {
279    // The initial height in the returned `vec` is the tip height,
280    // and the final height is `MAX_BLOCK_REORG_HEIGHT` below the tip.
281    //
282    // The initial distance between heights is 1, and it doubles between each subsequent height.
283    // So the number of returned heights is approximately `log_2(MAX_BLOCK_REORG_HEIGHT)`.
284
285    // Limit the maximum locator depth.
286    let min_locator_height = tip_height
287        .0
288        .saturating_sub(constants::MAX_BLOCK_REORG_HEIGHT);
289
290    // Create an exponentially decreasing set of heights.
291    let exponential_locators = iter::successors(Some(1u32), |h| h.checked_mul(2))
292        .flat_map(move |step| tip_height.0.checked_sub(step));
293
294    // Start at the tip, add decreasing heights, and end MAX_BLOCK_REORG_HEIGHT below the tip.
295    let locators = iter::once(tip_height.0)
296        .chain(exponential_locators)
297        .take_while(move |&height| height > min_locator_height)
298        .chain(iter::once(min_locator_height))
299        .map(block::Height)
300        .collect();
301
302    tracing::debug!(
303        ?tip_height,
304        ?min_locator_height,
305        ?locators,
306        "created block locator"
307    );
308
309    locators
310}
311
312/// Find the first hash that's in the peer's `known_blocks`, and in `chain` or `db`.
313///
314/// Returns `None` if:
315///   * there is no matching hash in the chain, or
316///   * the state is empty.
317fn find_chain_intersection<C>(
318    chain: Option<C>,
319    db: &ZebraDb,
320    known_blocks: Vec<block::Hash>,
321) -> Option<block::Hash>
322where
323    C: AsRef<Chain>,
324{
325    // We can get a block locator request before we have downloaded the genesis block
326    if chain.is_none() && db.is_empty() {
327        return None;
328    }
329
330    let chain = chain.as_ref();
331
332    known_blocks
333        .iter()
334        .find(|&&hash| chain_contains_hash(chain, db, hash))
335        .cloned()
336}
337
338/// Returns a range of [`Height`]s in the chain,
339/// starting after the `intersection` hash on the chain.
340///
341/// See [`find_chain_hashes()`] for details.
342fn find_chain_height_range<C>(
343    chain: Option<C>,
344    db: &ZebraDb,
345    intersection: Option<block::Hash>,
346    stop: Option<block::Hash>,
347    max_len: u32,
348) -> impl RangeBounds<u32> + Iterator<Item = u32>
349where
350    C: AsRef<Chain>,
351{
352    #[allow(clippy::reversed_empty_ranges)]
353    const EMPTY_RANGE: RangeInclusive<u32> = 1..=0;
354
355    assert!(max_len > 0, "max_len must be at least 1");
356
357    let chain = chain.as_ref();
358
359    // We can get a block locator request before we have downloaded the genesis block
360    let chain_tip_height = if let Some(height) = tip_height(chain, db) {
361        height
362    } else {
363        tracing::debug!(
364            response_len = ?0,
365            "responding to peer GetBlocks or GetHeaders with empty state",
366        );
367
368        return EMPTY_RANGE;
369    };
370
371    // Find the intersection height
372    let intersection_height = match intersection {
373        Some(intersection_hash) => match height_by_hash(chain, db, intersection_hash) {
374            Some(intersection_height) => Some(intersection_height),
375
376            // A recently committed block dropped the intersection we previously found
377            None => {
378                info!(
379                    ?intersection,
380                    ?stop,
381                    ?max_len,
382                    "state found intersection but then dropped it, ignoring request",
383                );
384                return EMPTY_RANGE;
385            }
386        },
387        // There is no intersection
388        None => None,
389    };
390
391    // Now find the start and maximum heights
392    let (start_height, max_height) = match intersection_height {
393        // start after the intersection_height, and return max_len hashes or headers
394        Some(intersection_height) => (
395            Height(intersection_height.0 + 1),
396            Height(intersection_height.0 + max_len),
397        ),
398        // start at genesis, and return max_len hashes or headers
399        None => (Height(0), Height(max_len - 1)),
400    };
401
402    let stop_height = stop.and_then(|hash| height_by_hash(chain, db, hash));
403
404    // Compute the final height, making sure it is:
405    //   * at or below our chain tip, and
406    //   * at or below the height of the stop hash.
407    let final_height = std::cmp::min(max_height, chain_tip_height);
408    let final_height = stop_height
409        .map(|stop_height| std::cmp::min(final_height, stop_height))
410        .unwrap_or(final_height);
411
412    // TODO: implement Step for Height, when Step stabilises
413    //       https://github.com/rust-lang/rust/issues/42168
414    let height_range = start_height.0..=final_height.0;
415    let response_len = height_range.clone().count();
416
417    tracing::debug!(
418        ?start_height,
419        ?final_height,
420        ?response_len,
421        ?chain_tip_height,
422        ?stop_height,
423        ?intersection_height,
424        ?intersection,
425        ?stop,
426        ?max_len,
427        "responding to peer GetBlocks or GetHeaders",
428    );
429
430    // Check the function implements the Find protocol
431    assert!(
432        response_len <= max_len.try_into().expect("fits in usize"),
433        "a Find response must not exceed the maximum response length",
434    );
435
436    height_range
437}
438
439/// Returns a list of [`block::Hash`]es in the chain,
440/// following the `intersection` with the chain.
441///
442/// See [`find_chain_hashes()`] for details.
443fn collect_chain_hashes<C>(
444    chain: Option<C>,
445    db: &ZebraDb,
446    intersection: Option<block::Hash>,
447    stop: Option<block::Hash>,
448    max_len: u32,
449) -> Vec<block::Hash>
450where
451    C: AsRef<Chain>,
452{
453    let chain = chain.as_ref();
454
455    let height_range = find_chain_height_range(chain, db, intersection, stop, max_len);
456
457    // All the hashes should be in the chain.
458    // If they are not, we don't want to return them.
459    let hashes: Vec<block::Hash> = height_range.into_iter().map_while(|height| {
460        let hash = hash_by_height(chain, db, Height(height));
461
462        // A recently committed block dropped the intersection we previously found.
463        if hash.is_none() {
464            info!(
465                ?intersection,
466                ?stop,
467                ?max_len,
468                "state found height range, but then partially dropped it, returning partial response",
469            );
470        }
471
472        tracing::trace!(
473            ?hash,
474            ?height,
475            ?intersection,
476            ?stop,
477            ?max_len,
478            "adding hash to peer Find response",
479        );
480
481        hash
482    }).collect();
483
484    // Check the function implements the Find protocol
485    assert!(
486        intersection
487            .map(|hash| !hashes.contains(&hash))
488            .unwrap_or(true),
489        "the list must not contain the intersection hash",
490    );
491
492    if let (Some(stop), Some((_, hashes_except_last))) = (stop, hashes.split_last()) {
493        assert!(
494            !hashes_except_last.contains(&stop),
495            "if the stop hash is in the list, it must be the final hash",
496        );
497    }
498
499    hashes
500}
501
502/// Returns a list of [`block::Header`]s in the chain,
503/// following the `intersection` with the chain.
504///
505/// See [`find_chain_hashes()`] for details.
506fn collect_chain_headers<C>(
507    chain: Option<C>,
508    db: &ZebraDb,
509    intersection: Option<block::Hash>,
510    stop: Option<block::Hash>,
511    max_len: u32,
512) -> Vec<Arc<block::Header>>
513where
514    C: AsRef<Chain>,
515{
516    let chain = chain.as_ref();
517
518    let height_range = find_chain_height_range(chain, db, intersection, stop, max_len);
519
520    // We don't check that this function implements the Find protocol,
521    // because fetching extra hashes (or re-calculating hashes) is expensive.
522    // (This was one of the most expensive and longest-running functions in the state.)
523
524    // All the headers should be in the chain.
525    // If they are not, we don't want to return them.
526    height_range.into_iter().map_while(|height| {
527        let header = block_header(chain, db, Height(height).into());
528
529        // A recently committed block dropped the intersection we previously found
530        if header.is_none() {
531            info!(
532                ?intersection,
533                ?stop,
534                ?max_len,
535                "state found height range, but then partially dropped it, returning partial response",
536            );
537        }
538
539        tracing::trace!(
540            ?height,
541            ?intersection,
542            ?stop,
543            ?max_len,
544            "adding header to peer Find response",
545        );
546
547        header
548    }).collect()
549}
550
551/// Finds the first hash that's in the peer's `known_blocks` and the chain.
552/// Returns a list of hashes that follow that intersection, from the chain.
553///
554/// Starts from the first matching hash in the chain, ignoring all other hashes in
555/// `known_blocks`. If there is no matching hash in the chain, starts from the genesis
556/// hash.
557///
558/// Includes finalized and non-finalized blocks.
559///
560/// Stops the list of hashes after:
561///   * adding the tip,
562///   * adding the `stop` hash to the list, if it is in the chain, or
563///   * adding `max_len` hashes to the list.
564///
565/// Returns an empty list if the state is empty,
566/// and a partial or empty list if the found heights are concurrently modified.
567pub fn find_chain_hashes<C>(
568    chain: Option<C>,
569    db: &ZebraDb,
570    known_blocks: Vec<block::Hash>,
571    stop: Option<block::Hash>,
572    max_len: u32,
573) -> Vec<block::Hash>
574where
575    C: AsRef<Chain>,
576{
577    // # Correctness
578    //
579    // See the note in `block_locator()`.
580
581    let chain = chain.as_ref();
582    let intersection = find_chain_intersection(chain, db, known_blocks);
583
584    collect_chain_hashes(chain, db, intersection, stop, max_len)
585}
586
587/// Finds the first hash that's in the peer's `known_blocks` and the chain.
588/// Returns a list of headers that follow that intersection, from the chain.
589///
590/// See [`find_chain_hashes()`] for details.
591pub fn find_chain_headers<C>(
592    chain: Option<C>,
593    db: &ZebraDb,
594    known_blocks: Vec<block::Hash>,
595    stop: Option<block::Hash>,
596    max_len: u32,
597) -> Vec<Arc<block::Header>>
598where
599    C: AsRef<Chain>,
600{
601    // # Correctness
602    //
603    // Headers are looked up by their hashes using a unique mapping,
604    // so it is not possible for multiple hashes to look up the same header,
605    // even across different chains.
606    //
607    // See also the note in `block_locator()`.
608
609    let chain = chain.as_ref();
610    let intersection = find_chain_intersection(chain, db, known_blocks);
611
612    collect_chain_headers(chain, db, intersection, stop, max_len)
613}
614
615/// Returns the median-time-past of the *next* block to be added to the best chain in
616/// `non_finalized_state` or `db`.
617///
618/// # Panics
619///
620/// - If we don't have enough blocks in the state.
621pub fn next_median_time_past(
622    non_finalized_state: &NonFinalizedState,
623    db: &ZebraDb,
624) -> Result<DateTime32, BoxError> {
625    let mut best_relevant_chain_result = best_relevant_chain(non_finalized_state, db);
626
627    // Retry the finalized state query if it was interrupted by a finalizing block.
628    //
629    // TODO: refactor this into a generic retry(finalized_closure, process_and_check_closure) fn
630    for _ in 0..FINALIZED_STATE_QUERY_RETRIES {
631        if best_relevant_chain_result.is_ok() {
632            break;
633        }
634
635        best_relevant_chain_result = best_relevant_chain(non_finalized_state, db);
636    }
637
638    Ok(calculate_median_time_past(best_relevant_chain_result?))
639}
640
641/// Do a consistency check by checking the finalized tip before and after all other database queries.
642///
643/// Returns recent blocks in reverse height order from the tip.
644/// Returns an error if the tip obtained before and after is not the same.
645///
646/// # Panics
647///
648/// - If we don't have enough blocks in the state.
649fn best_relevant_chain(
650    non_finalized_state: &NonFinalizedState,
651    db: &ZebraDb,
652) -> Result<Vec<Arc<Block>>, BoxError> {
653    let state_tip_before_queries = read::best_tip(non_finalized_state, db).ok_or_else(|| {
654        BoxError::from("Zebra's state is empty, wait until it syncs to the chain tip")
655    })?;
656
657    let best_relevant_chain =
658        any_ancestor_blocks(non_finalized_state, db, state_tip_before_queries.1);
659    let best_relevant_chain: Vec<_> = best_relevant_chain
660        .into_iter()
661        .take(POW_MEDIAN_BLOCK_SPAN)
662        .collect();
663
664    if best_relevant_chain.is_empty() {
665        return Err("missing genesis block, wait until it is committed".into());
666    };
667
668    let state_tip_after_queries =
669        read::best_tip(non_finalized_state, db).expect("already checked for an empty tip");
670
671    if state_tip_before_queries != state_tip_after_queries {
672        return Err("Zebra is committing too many blocks to the state, \
673                    wait until it syncs to the chain tip"
674            .into());
675    }
676
677    Ok(best_relevant_chain)
678}
679
680/// Returns the median-time-past for the provided `relevant_chain`.
681///
682/// The `relevant_chain` has blocks in reverse height order.
683///
684/// See [`next_median_time_past()`] for details.
685pub(crate) fn calculate_median_time_past(relevant_chain: Vec<Arc<Block>>) -> DateTime32 {
686    let relevant_data: Vec<DateTime<Utc>> = relevant_chain
687        .iter()
688        .map(|block| block.header.time)
689        .collect();
690
691    // > Define the median-time-past of a block to be the median of the nTime fields of the
692    // > preceding PoWMedianBlockSpan blocks (or all preceding blocks if there are fewer than
693    // > PoWMedianBlockSpan). The median-time-past of a genesis block is not defined.
694    // https://zips.z.cash/protocol/protocol.pdf#blockheader
695    let median_time_past = AdjustedDifficulty::median_time(relevant_data);
696
697    DateTime32::try_from(median_time_past).expect("valid blocks have in-range times")
698}