Skip to main content

zebra_chain/serialization/
zcash_deserialize.rs

1//! Converting bytes into Zcash consensus-critical data structures.
2
3use std::{io, net::Ipv6Addr, sync::Arc};
4
5use super::{AtLeastOne, CompactSizeMessage, SerializationError, MAX_PROTOCOL_MESSAGE_LEN};
6
7/// Initial-allocation cap for `zcash_deserialize_external_count`.
8///
9/// 1024 is large enough that honest messages amortize their growth to a few
10/// reallocations.
11const MAX_INITIAL_ALLOCATION: usize = 1024;
12
13/// Consensus-critical deserialization for Zcash.
14///
15/// This trait provides a generic deserialization for consensus-critical
16/// formats, such as network messages, transactions, blocks, etc.
17///
18/// It is intended for use only for consensus-critical formats.
19/// Internal deserialization can freely use `serde`, or any other format.
20pub trait ZcashDeserialize: Sized {
21    /// Try to read `self` from the given `reader`.
22    ///
23    /// This function has a `zcash_` prefix to alert the reader that the
24    /// serialization in use is consensus-critical serialization, rather than
25    /// some other kind of serialization.
26    fn zcash_deserialize<R: io::Read>(reader: R) -> Result<Self, SerializationError>;
27}
28
29/// Deserialize a `Vec`, where the number of items is set by a CompactSize
30/// prefix in the data. This is the most common format in Zcash.
31///
32/// See `zcash_deserialize_external_count` for more details, and usage
33/// information.
34impl<T: ZcashDeserialize + TrustedPreallocate> ZcashDeserialize for Vec<T> {
35    fn zcash_deserialize<R: io::Read>(mut reader: R) -> Result<Self, SerializationError> {
36        let len: CompactSizeMessage = (&mut reader).zcash_deserialize_into()?;
37        zcash_deserialize_external_count(len.into(), reader)
38    }
39}
40
41/// Deserialize an `AtLeastOne` vector, where the number of items is set by a
42/// CompactSize prefix in the data. This is the most common format in Zcash.
43impl<T: ZcashDeserialize + TrustedPreallocate> ZcashDeserialize for AtLeastOne<T> {
44    fn zcash_deserialize<R: io::Read>(mut reader: R) -> Result<Self, SerializationError> {
45        let v: Vec<T> = (&mut reader).zcash_deserialize_into()?;
46        let at_least_one: AtLeastOne<T> = v.try_into()?;
47        Ok(at_least_one)
48    }
49}
50
51/// Implement ZcashDeserialize for `Vec<u8>` directly instead of using the blanket Vec implementation
52///
53/// This allows us to optimize the inner loop into a single call to `read_exact()`
54/// Note that we don't implement TrustedPreallocate for u8.
55/// This allows the optimization without relying on specialization.
56impl ZcashDeserialize for Vec<u8> {
57    fn zcash_deserialize<R: io::Read>(mut reader: R) -> Result<Self, SerializationError> {
58        let len: CompactSizeMessage = (&mut reader).zcash_deserialize_into()?;
59        zcash_deserialize_bytes_external_count(len.into(), reader)
60    }
61}
62
63/// Deserialize a `Vec` containing `external_count` items.
64///
65/// In Zcash, most arrays are stored as a CompactSize, followed by that number
66/// of items of type `T`. But in `Transaction::V5`, some types are serialized as
67/// multiple arrays in different locations, with a single CompactSize before the
68/// first array.
69///
70/// ## Usage
71///
72/// Use `zcash_deserialize_external_count` when the array count is determined by
73/// other data, or a consensus rule.
74///
75/// Use `Vec::zcash_deserialize` for data that contains CompactSize count,
76/// followed by the data array.
77///
78/// For example, when a single count applies to multiple arrays:
79/// 1. Use `Vec::zcash_deserialize` for the array that has a data count.
80/// 2. Use `zcash_deserialize_external_count` for the arrays with no count in the
81///    data, passing the length of the first array.
82///
83/// This function has a `zcash_` prefix to alert the reader that the
84/// serialization in use is consensus-critical serialization, rather than
85/// some other kind of serialization.
86pub fn zcash_deserialize_external_count<R: io::Read, T: ZcashDeserialize + TrustedPreallocate>(
87    external_count: usize,
88    mut reader: R,
89) -> Result<Vec<T>, SerializationError> {
90    match u64::try_from(external_count) {
91        Ok(external_count) if external_count > T::max_allocation() => {
92            return Err(SerializationError::Parse(
93                "Vector longer than max_allocation",
94            ))
95        }
96        Ok(_) => {}
97        // As of 2021, usize is less than or equal to 64 bits on all (or almost all?) supported Rust platforms.
98        // So in practice this error is impossible. (But the check is required, because Rust is future-proof
99        // for 128 bit memory spaces.)
100        Err(_) => return Err(SerializationError::Parse("Vector longer than u64::MAX")),
101    }
102    // Cap the upfront reservation. The Vec grows via push() as elements
103    // arrive, so a peer-supplied `external_count` can't force a large
104    // allocation before any data is read. Fixes the deserializer-level
105    // case of GHSA-xr93-pcq3-pxf8.
106    let mut vec = Vec::with_capacity(external_count.min(MAX_INITIAL_ALLOCATION));
107    for _ in 0..external_count {
108        vec.push(T::zcash_deserialize(&mut reader)?);
109    }
110    Ok(vec)
111}
112
113/// `zcash_deserialize_external_count`, specialised for raw bytes.
114///
115/// This allows us to optimize the inner loop into a single call to `read_exact()`.
116///
117/// This function has a `zcash_` prefix to alert the reader that the
118/// serialization in use is consensus-critical serialization, rather than
119/// some other kind of serialization.
120pub fn zcash_deserialize_bytes_external_count<R: io::Read>(
121    external_count: usize,
122    mut reader: R,
123) -> Result<Vec<u8>, SerializationError> {
124    if external_count > MAX_U8_ALLOCATION {
125        return Err(SerializationError::Parse(
126            "Byte vector longer than MAX_U8_ALLOCATION",
127        ));
128    }
129    let mut vec = vec![0u8; external_count];
130    reader.read_exact(&mut vec)?;
131    Ok(vec)
132}
133
134/// `zcash_deserialize_external_count`, specialised for [`String`].
135/// The external count is in bytes. (Not UTF-8 characters.)
136///
137/// This allows us to optimize the inner loop into a single call to `read_exact()`.
138///
139/// This function has a `zcash_` prefix to alert the reader that the
140/// serialization in use is consensus-critical serialization, rather than
141/// some other kind of serialization.
142pub fn zcash_deserialize_string_external_count<R: io::Read>(
143    external_byte_count: usize,
144    reader: R,
145) -> Result<String, SerializationError> {
146    let bytes = zcash_deserialize_bytes_external_count(external_byte_count, reader)?;
147
148    String::from_utf8(bytes).map_err(|_| SerializationError::Parse("invalid utf-8"))
149}
150
151/// Read a Bitcoin-encoded UTF-8 string.
152impl ZcashDeserialize for String {
153    fn zcash_deserialize<R: io::Read>(mut reader: R) -> Result<Self, SerializationError> {
154        let byte_count: CompactSizeMessage = (&mut reader).zcash_deserialize_into()?;
155        zcash_deserialize_string_external_count(byte_count.into(), reader)
156    }
157}
158
159// We don't impl ZcashDeserialize for Ipv4Addr or SocketAddrs,
160// because the IPv4 and port formats are different in addr (v1) and addrv2 messages.
161
162/// Read a Bitcoin-encoded IPv6 address.
163impl ZcashDeserialize for Ipv6Addr {
164    fn zcash_deserialize<R: io::Read>(mut reader: R) -> Result<Self, SerializationError> {
165        let mut ipv6_addr = [0u8; 16];
166        reader.read_exact(&mut ipv6_addr)?;
167
168        Ok(Ipv6Addr::from(ipv6_addr))
169    }
170}
171
172/// Helper for deserializing more succinctly via type inference
173pub trait ZcashDeserializeInto {
174    /// Deserialize based on type inference
175    fn zcash_deserialize_into<T>(self) -> Result<T, SerializationError>
176    where
177        T: ZcashDeserialize;
178}
179
180impl<R: io::Read> ZcashDeserializeInto for R {
181    fn zcash_deserialize_into<T>(self) -> Result<T, SerializationError>
182    where
183        T: ZcashDeserialize,
184    {
185        T::zcash_deserialize(self)
186    }
187}
188
189/// Blind preallocation of a `Vec<T: TrustedPreallocate>` is based on a bounded length. This is in contrast
190/// to blind preallocation of a generic `Vec<T>`, which is a DOS vector.
191///
192/// The max_allocation() function provides a loose upper bound on the size of the `Vec<T: TrustedPreallocate>`
193/// which can possibly be received from an honest peer. If this limit is too low, Zebra may reject valid messages.
194/// In the worst case, setting the lower bound too low could cause Zebra to fall out of consensus by rejecting all messages containing a valid block.
195pub trait TrustedPreallocate {
196    /// Provides a ***loose upper bound*** on the size of the `Vec<T: TrustedPreallocate>`
197    /// which can possibly be received from an honest peer.
198    fn max_allocation() -> u64;
199}
200
201impl<T> TrustedPreallocate for Arc<T>
202where
203    T: TrustedPreallocate,
204{
205    fn max_allocation() -> u64 {
206        T::max_allocation()
207    }
208}
209
210/// The length of the longest valid `Vec<u8>` that can be received over the network
211///
212/// It takes 5 bytes to encode a CompactSize representing any number netween 2^16 and (2^32 - 1)
213/// MAX_PROTOCOL_MESSAGE_LEN is ~2^21, so the largest `Vec<u8>` that can be received from an honest peer is
214/// (MAX_PROTOCOL_MESSAGE_LEN - 5);
215pub(crate) const MAX_U8_ALLOCATION: usize = MAX_PROTOCOL_MESSAGE_LEN - 5;