zebra_state/config.rs
1//! Cached state configuration for Zebra.
2
3use std::{
4 fs::{self, canonicalize, remove_dir_all, DirEntry, ReadDir},
5 io::ErrorKind,
6 path::{Path, PathBuf},
7 time::Duration,
8};
9
10use semver::Version;
11use serde::{Deserialize, Serialize};
12use tokio::task::{spawn_blocking, JoinHandle};
13use tracing::Span;
14
15use zebra_chain::{common::default_cache_dir, parameters::Network};
16
17use crate::{
18 constants::{DATABASE_FORMAT_VERSION_FILE_NAME, STATE_DATABASE_KIND},
19 service::finalized_state::restorable_db_versions,
20 state_database_format_version_in_code, BoxError,
21};
22
23/// Configuration for the state service.
24#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
25#[serde(deny_unknown_fields, default)]
26pub struct Config {
27 /// The root directory for storing cached block data.
28 ///
29 /// If you change this directory, you might also want to change `network.cache_dir`.
30 ///
31 /// This cache stores permanent blockchain state that can be replicated from
32 /// the network, including the best chain, blocks, the UTXO set, and other indexes.
33 /// Any state that can be rolled back is only stored in memory.
34 ///
35 /// The `zebra-state` cache does *not* include any private data, such as wallet data.
36 ///
37 /// You can delete the entire cached state directory, but it will impact your node's
38 /// readiness and network usage. If you do, Zebra will re-sync from genesis the next
39 /// time it is launched.
40 ///
41 /// The default directory is platform dependent, based on
42 /// [`dirs::cache_dir()`](https://docs.rs/dirs/3.0.1/dirs/fn.cache_dir.html):
43 ///
44 /// |Platform | Value | Example |
45 /// | ------- | ----------------------------------------------- | ------------------------------------ |
46 /// | Linux | `$XDG_CACHE_HOME/zebra` or `$HOME/.cache/zebra` | `/home/alice/.cache/zebra` |
47 /// | macOS | `$HOME/Library/Caches/zebra` | `/Users/Alice/Library/Caches/zebra` |
48 /// | Windows | `{FOLDERID_LocalAppData}\zebra` | `C:\Users\Alice\AppData\Local\zebra` |
49 /// | Other | `std::env::current_dir()/cache/zebra` | `/cache/zebra` |
50 ///
51 /// # Security
52 ///
53 /// If you are running Zebra with elevated permissions ("root"), create the
54 /// directory for this file before running Zebra, and make sure the Zebra user
55 /// account has exclusive access to that directory, and other users can't modify
56 /// its parent directories.
57 ///
58 /// # Implementation Details
59 ///
60 /// Each state format version and network has a separate state.
61 /// These states are stored in `state/vN/mainnet` and `state/vN/testnet` subdirectories,
62 /// underneath the `cache_dir` path, where `N` is the state format version.
63 ///
64 /// When Zebra's state format changes, it creates a new state subdirectory for that version,
65 /// and re-syncs from genesis.
66 ///
67 /// Old state versions are automatically deleted at startup. You can also manually delete old
68 /// state versions.
69 pub cache_dir: PathBuf,
70
71 /// Whether to use an ephemeral database.
72 ///
73 /// Ephemeral databases are stored in a temporary directory created using [`tempfile::tempdir()`].
74 /// They are deleted when Zebra exits successfully.
75 /// (If Zebra panics or crashes, the ephemeral database won't be deleted.)
76 ///
77 /// Set to `false` by default. If this is set to `true`, [`cache_dir`] is ignored.
78 ///
79 /// Ephemeral directories are created in the [`std::env::temp_dir()`].
80 /// Zebra names each directory after the state version and network, for example: `zebra-state-v21-mainnet-XnyGnE`.
81 ///
82 /// [`cache_dir`]: struct.Config.html#structfield.cache_dir
83 pub ephemeral: bool,
84
85 /// Whether to cache non-finalized blocks on disk to be restored when Zebra restarts.
86 ///
87 /// Set to `true` by default. If this is set to `false`, Zebra will irrecoverably drop
88 /// non-finalized blocks when the process exits and will have to re-download them from
89 /// the network when it restarts, if those blocks are still available in the network.
90 ///
91 /// Note: The non-finalized state will be written to a backup cache once per 5 seconds at most.
92 /// If blocks are added to the non-finalized state more frequently, the backup may not reflect
93 /// Zebra's last non-finalized state before it shut down.
94 pub should_backup_non_finalized_state: bool,
95
96 /// Whether to delete the old database directories when present.
97 ///
98 /// Set to `true` by default. If this is set to `false`,
99 /// no check for old database versions will be made and nothing will be
100 /// deleted.
101 pub delete_old_database: bool,
102
103 // Debug configs
104 //
105 /// Commit blocks to the finalized state up to this height, then exit Zebra.
106 ///
107 /// Set to `None` by default: Zebra continues syncing indefinitely.
108 pub debug_stop_at_height: Option<u32>,
109
110 /// While Zebra is running, check state validity this often.
111 ///
112 /// Set to `None` by default: Zebra only checks state format validity on startup and shutdown.
113 #[serde(with = "humantime_serde")]
114 pub debug_validity_check_interval: Option<Duration>,
115
116 /// If true, skip spawning the non-finalized state backup task and instead write
117 /// the non-finalized state to the backup directory synchronously before each update
118 /// to the latest chain tip or non-finalized state channels.
119 ///
120 /// Set to `false` by default. When `true`, the non-finalized state is still restored
121 /// from the backup directory on startup, but updates are written synchronously on every
122 /// block commit rather than asynchronously every 5 seconds.
123 ///
124 /// This is intended for testing scenarios where blocks are committed rapidly and the
125 /// async backup task may not flush all blocks before shutdown.
126 pub debug_skip_non_finalized_state_backup_task: bool,
127
128 // Elasticsearch configs
129 //
130 #[cfg(feature = "elasticsearch")]
131 /// The elasticsearch database url.
132 pub elasticsearch_url: String,
133
134 #[cfg(feature = "elasticsearch")]
135 /// The elasticsearch database username.
136 pub elasticsearch_username: String,
137
138 #[cfg(feature = "elasticsearch")]
139 /// The elasticsearch database password.
140 pub elasticsearch_password: String,
141}
142
143fn gen_temp_path(prefix: &str) -> PathBuf {
144 tempfile::Builder::new()
145 .prefix(prefix)
146 .tempdir()
147 .expect("temporary directory is created successfully")
148 .keep()
149}
150
151impl Config {
152 /// Returns the path for the database, based on the kind, major version and network.
153 /// Each incompatible database format or network gets its own unique path.
154 pub fn db_path(
155 &self,
156 db_kind: impl AsRef<str>,
157 major_version: u64,
158 network: &Network,
159 ) -> PathBuf {
160 let db_kind = db_kind.as_ref();
161 let major_version = format!("v{major_version}");
162 let net_dir = network.lowercase_name();
163
164 if self.ephemeral {
165 gen_temp_path(&format!("zebra-{db_kind}-{major_version}-{net_dir}-"))
166 } else {
167 self.cache_dir
168 .join(db_kind)
169 .join(major_version)
170 .join(net_dir)
171 }
172 }
173
174 /// Returns the path for the non-finalized state backup directory, based on the network.
175 /// Non-finalized state backup files are encoded in the network protocol format and remain
176 /// valid across db format upgrades.
177 pub fn non_finalized_state_backup_dir(&self, network: &Network) -> Option<PathBuf> {
178 if self.ephemeral || !self.should_backup_non_finalized_state {
179 // Ephemeral databases are intended to be irrecoverable across restarts and don't
180 // require a backup for the non-finalized state.
181 return None;
182 }
183
184 let net_dir = network.lowercase_name();
185 Some(self.cache_dir.join("non_finalized_state").join(net_dir))
186 }
187
188 /// Returns the path for the database format minor/patch version file,
189 /// based on the kind, major version and network.
190 pub fn version_file_path(
191 &self,
192 db_kind: impl AsRef<str>,
193 major_version: u64,
194 network: &Network,
195 ) -> PathBuf {
196 let mut version_path = self.db_path(db_kind, major_version, network);
197
198 version_path.push(DATABASE_FORMAT_VERSION_FILE_NAME);
199
200 version_path
201 }
202
203 /// Returns a config for a temporary database that is deleted when it is dropped.
204 pub fn ephemeral() -> Config {
205 Config {
206 ephemeral: true,
207 ..Config::default()
208 }
209 }
210}
211
212impl Default for Config {
213 fn default() -> Self {
214 Self {
215 cache_dir: default_cache_dir(),
216 ephemeral: false,
217 should_backup_non_finalized_state: true,
218 delete_old_database: true,
219 debug_stop_at_height: None,
220 debug_validity_check_interval: None,
221 debug_skip_non_finalized_state_backup_task: false,
222 #[cfg(feature = "elasticsearch")]
223 elasticsearch_url: "https://localhost:9200".to_string(),
224 #[cfg(feature = "elasticsearch")]
225 elasticsearch_username: "elastic".to_string(),
226 #[cfg(feature = "elasticsearch")]
227 elasticsearch_password: "".to_string(),
228 }
229 }
230}
231
232// Cleaning up old database versions
233// TODO: put this in a different module?
234
235/// Spawns a task that checks if there are old state database folders,
236/// and deletes them from the filesystem.
237///
238/// See `check_and_delete_old_databases()` for details.
239pub fn check_and_delete_old_state_databases(config: &Config, network: &Network) -> JoinHandle<()> {
240 check_and_delete_old_databases(
241 config,
242 STATE_DATABASE_KIND,
243 state_database_format_version_in_code().major,
244 network,
245 )
246}
247
248/// Spawns a task that checks if there are old database folders,
249/// and deletes them from the filesystem.
250///
251/// Iterate over the files and directories in the databases folder and delete if:
252/// - The `db_kind` directory exists.
253/// - The entry in `db_kind` is a directory.
254/// - The directory name has a prefix `v`.
255/// - The directory name without the prefix can be parsed as an unsigned number.
256/// - The parsed number is lower than the `major_version`.
257///
258/// The network is used to generate the path, then ignored.
259/// If `config` is an ephemeral database, no databases are deleted.
260///
261/// # Panics
262///
263/// If the path doesn't match the expected `db_kind/major_version/network` format.
264pub fn check_and_delete_old_databases(
265 config: &Config,
266 db_kind: impl AsRef<str>,
267 major_version: u64,
268 network: &Network,
269) -> JoinHandle<()> {
270 let current_span = Span::current();
271 let config = config.clone();
272 let db_kind = db_kind.as_ref().to_string();
273 let network = network.clone();
274
275 spawn_blocking(move || {
276 current_span.in_scope(|| {
277 delete_old_databases(config, db_kind, major_version, &network);
278 info!("finished old database version cleanup task");
279 })
280 })
281}
282
283/// Check if there are old database folders and delete them from the filesystem.
284///
285/// See [`check_and_delete_old_databases`] for details.
286fn delete_old_databases(config: Config, db_kind: String, major_version: u64, network: &Network) {
287 if config.ephemeral || !config.delete_old_database {
288 return;
289 }
290
291 info!(db_kind, "checking for old database versions");
292
293 let restorable_db_versions = restorable_db_versions();
294
295 let mut db_path = config.db_path(&db_kind, major_version, network);
296 // Check and remove the network path.
297 assert_eq!(
298 db_path.file_name(),
299 Some(network.lowercase_name().as_ref()),
300 "unexpected database network path structure"
301 );
302 assert!(db_path.pop());
303
304 // Check and remove the major version path, we'll iterate over them all below.
305 assert_eq!(
306 db_path.file_name(),
307 Some(format!("v{major_version}").as_ref()),
308 "unexpected database version path structure"
309 );
310 assert!(db_path.pop());
311
312 // Check for the correct database kind to iterate within.
313 assert_eq!(
314 db_path.file_name(),
315 Some(db_kind.as_ref()),
316 "unexpected database kind path structure"
317 );
318
319 if let Some(db_kind_dir) = read_dir(&db_path) {
320 for entry in db_kind_dir.flatten() {
321 let deleted_db =
322 check_and_delete_database(&config, major_version, &restorable_db_versions, &entry);
323
324 if let Some(deleted_db) = deleted_db {
325 info!(?deleted_db, "deleted outdated {db_kind} database directory");
326 }
327 }
328 }
329}
330
331/// Return a `ReadDir` for `dir`, after checking that `dir` exists and can be read.
332///
333/// Returns `None` if any operation fails.
334fn read_dir(dir: &Path) -> Option<ReadDir> {
335 if dir.exists() {
336 if let Ok(read_dir) = dir.read_dir() {
337 return Some(read_dir);
338 }
339 }
340 None
341}
342
343/// Check if `entry` is an old database directory, and delete it from the filesystem.
344/// See [`check_and_delete_old_databases`] for details.
345///
346/// If the directory was deleted, returns its path.
347fn check_and_delete_database(
348 config: &Config,
349 major_version: u64,
350 restorable_db_versions: &[u64],
351 entry: &DirEntry,
352) -> Option<PathBuf> {
353 let dir_name = parse_dir_name(entry)?;
354 let dir_major_version = parse_major_version(&dir_name)?;
355
356 if dir_major_version >= major_version {
357 return None;
358 }
359
360 // Don't delete databases that can be reused.
361 if restorable_db_versions
362 .iter()
363 .map(|v| v - 1)
364 .any(|v| v == dir_major_version)
365 {
366 return None;
367 }
368
369 let outdated_path = entry.path();
370
371 // # Correctness
372 //
373 // Check that the path we're about to delete is inside the cache directory.
374 // If the user has symlinked the outdated state directory to a non-cache directory,
375 // we don't want to delete it, because it might contain other files.
376 //
377 // We don't attempt to guard against malicious symlinks created by attackers
378 // (TOCTOU attacks). Zebra should not be run with elevated privileges.
379 let cache_path = canonicalize(&config.cache_dir).ok()?;
380 let outdated_path = canonicalize(outdated_path).ok()?;
381
382 if !outdated_path.starts_with(&cache_path) {
383 info!(
384 skipped_path = ?outdated_path,
385 ?cache_path,
386 "skipped cleanup of outdated state directory: state is outside cache directory",
387 );
388
389 return None;
390 }
391
392 remove_dir_all(&outdated_path).ok().map(|()| outdated_path)
393}
394
395/// Check if `entry` is a directory with a valid UTF-8 name.
396/// (State directory names are guaranteed to be UTF-8.)
397///
398/// Returns `None` if any operation fails.
399fn parse_dir_name(entry: &DirEntry) -> Option<String> {
400 if let Ok(file_type) = entry.file_type() {
401 if file_type.is_dir() {
402 if let Ok(dir_name) = entry.file_name().into_string() {
403 return Some(dir_name);
404 }
405 }
406 }
407 None
408}
409
410/// Parse the database major version number from `dir_name`.
411///
412/// Returns `None` if parsing fails, or the directory name is not in the expected format.
413fn parse_major_version(dir_name: &str) -> Option<u64> {
414 dir_name
415 .strip_prefix('v')
416 .and_then(|version| version.parse().ok())
417}
418
419// TODO: move these to the format upgrade module
420
421/// Returns the full semantic version of the on-disk state database, based on its config and network.
422pub fn state_database_format_version_on_disk(
423 config: &Config,
424 network: &Network,
425) -> Result<Option<Version>, BoxError> {
426 database_format_version_on_disk(
427 config,
428 STATE_DATABASE_KIND,
429 state_database_format_version_in_code().major,
430 network,
431 )
432}
433
434/// Returns the full semantic version of the on-disk database, based on its config, kind, major version,
435/// and network.
436///
437/// Typically, the version is read from a version text file.
438///
439/// If there is an existing on-disk database, but no version file,
440/// returns `Ok(Some(major_version.0.0))`.
441/// (This happens even if the database directory was just newly created.)
442///
443/// If there is no existing on-disk database, returns `Ok(None)`.
444///
445/// This is the format of the data on disk, the version
446/// implemented by the running Zebra code can be different.
447pub fn database_format_version_on_disk(
448 config: &Config,
449 db_kind: impl AsRef<str>,
450 major_version: u64,
451 network: &Network,
452) -> Result<Option<Version>, BoxError> {
453 let version_path = config.version_file_path(&db_kind, major_version, network);
454 let db_path = config.db_path(db_kind, major_version, network);
455
456 database_format_version_at_path(&version_path, &db_path, major_version)
457}
458
459/// Returns the full semantic version of the on-disk database at `version_path`.
460///
461/// See [`database_format_version_on_disk()`] for details.
462pub(crate) fn database_format_version_at_path(
463 version_path: &Path,
464 db_path: &Path,
465 major_version: u64,
466) -> Result<Option<Version>, BoxError> {
467 let disk_version_file = match fs::read_to_string(version_path) {
468 Ok(version) => Some(version),
469 Err(e) if e.kind() == ErrorKind::NotFound => {
470 // If the version file doesn't exist, don't guess the version yet.
471 None
472 }
473 Err(e) => Err(e)?,
474 };
475
476 // The database has a version file on disk
477 if let Some(version) = disk_version_file {
478 return Ok(Some(
479 version
480 .parse()
481 // Try to parse the previous format of the disk version file if it cannot be parsed as a `Version` directly.
482 .or_else(|err| {
483 format!("{major_version}.{version}")
484 .parse()
485 .map_err(|err2| format!("failed to parse format version: {err}, {err2}"))
486 })?,
487 ));
488 }
489
490 // There's no version file on disk, so we need to guess the version
491 // based on the database content
492 match fs::metadata(db_path) {
493 // But there is a database on disk, so it has the current major version with no upgrades.
494 // If the database directory was just newly created, we also return this version.
495 Ok(_metadata) => Ok(Some(Version::new(major_version, 0, 0))),
496
497 // There's no version file and no database on disk, so it's a new database.
498 // It will be created with the current version,
499 // but temporarily return the default version above until the version file is written.
500 Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
501
502 Err(e) => Err(e)?,
503 }
504}
505
506// Hide this destructive method from the public API, except in tests.
507#[allow(unused_imports)]
508pub(crate) use hidden::{
509 write_database_format_version_to_disk, write_state_database_format_version_to_disk,
510};
511
512pub(crate) mod hidden {
513 #![allow(dead_code)]
514
515 use zebra_chain::common::atomic_write;
516
517 use super::*;
518
519 /// Writes `changed_version` to the on-disk state database after the format is changed.
520 /// (Or a new database is created.)
521 ///
522 /// See `write_database_format_version_to_disk()` for details.
523 pub fn write_state_database_format_version_to_disk(
524 config: &Config,
525 changed_version: &Version,
526 network: &Network,
527 ) -> Result<(), BoxError> {
528 write_database_format_version_to_disk(
529 config,
530 STATE_DATABASE_KIND,
531 state_database_format_version_in_code().major,
532 changed_version,
533 network,
534 )
535 }
536
537 /// Writes `changed_version` to the on-disk database after the format is changed.
538 /// (Or a new database is created.)
539 ///
540 /// The database path is based on its kind, `major_version_in_code`, and network.
541 ///
542 /// # Correctness
543 ///
544 /// This should only be called:
545 /// - after each format upgrade is complete,
546 /// - when creating a new database, or
547 /// - when an older Zebra version opens a newer database.
548 ///
549 /// # Concurrency
550 ///
551 /// This must only be called while RocksDB has an open database for `config`.
552 /// Otherwise, multiple Zebra processes could write the version at the same time,
553 /// corrupting the file.
554 pub fn write_database_format_version_to_disk(
555 config: &Config,
556 db_kind: impl AsRef<str>,
557 major_version_in_code: u64,
558 changed_version: &Version,
559 network: &Network,
560 ) -> Result<(), BoxError> {
561 // Write the version file atomically so the cache is not corrupted if Zebra shuts down or
562 // crashes.
563 atomic_write(
564 config.version_file_path(db_kind, major_version_in_code, network),
565 changed_version.to_string().as_bytes(),
566 )??;
567
568 Ok(())
569 }
570}