Add availability-recovery from systematic chunks (#1644)

**Don't look at the commit history, it's confusing, as this branch is based on another branch that was merged** Fixes #598 Also implements [RFC #47](polkadot-fellows/RFCs#47) ## Description - Availability-recovery now first attempts to request the systematic chunks for large POVs (which are the first ~n/3 chunks, which can recover the full data without doing the costly reed-solomon decoding process). This has a fallback of recovering from all chunks, if for some reason the process fails. Additionally, backers are also used as a backup for requesting the systematic chunks if the assigned validator is not offering the chunk (each backer is only used for one systematic chunk, to not overload them). - Quite obviously, recovering from systematic chunks is much faster than recovering from regular chunks (4000% faster as measured on my apple M2 Pro). - Introduces a `ValidatorIndex` -> `ChunkIndex` mapping which is different for every core, in order to avoid only querying the first n/3 validators over and over again in the same session. The mapping is the one described in RFC 47. - The mapping is feature-gated by the [NodeFeatures runtime API](#2177) so that it can only be enabled via a governance call once a sufficient majority of validators have upgraded their client. If the feature is not enabled, the mapping will be the identity mapping and backwards-compatibility will be preserved. - Adds a new chunk request protocol version (v2), which adds the ChunkIndex to the response. This may or may not be checked against the expected chunk index. For av-distribution and systematic recovery, this will be checked, but for regular recovery, no. This is backwards compatible. First, a v2 request is attempted. If that fails during protocol negotiation, v1 is used. - Systematic recovery is only attempted during approval-voting, where we have easy access to the core_index. For disputes and collator pov_recovery, regular chunk requests are used, just as before. ## Performance results Some results from subsystem-bench: with regular chunk recovery: CPU usage per block 39.82s with recovery from backers: CPU usage per block 16.03s with systematic recovery: CPU usage per block 19.07s End-to-end results here: #598 (comment) #### TODO: - [x] [RFC #47](polkadot-fellows/RFCs#47) - [x] merge #2177 and rebase on top of those changes - [x] merge #2771 and rebase - [x] add tests - [x] preliminary performance measure on Versi: see #598 (comment) - [x] Rewrite the implementer's guide documentation - [x] #3065 - [x] paritytech/zombienet#1705 and fix zombienet tests - [x] security audit - [x] final versi test and performance measure --------- Signed-off-by: alindima <alin@parity.io> Co-authored-by: Javier Viola <javier@parity.io>
paritytech · May 28, 2024 · 523e625 · 523e625
1 parent 09f07d5
commit 523e625
Show file tree

Hide file tree

Showing 84 changed files with 7,540 additions and 2,338 deletions.
diff --git a/.gitlab/pipeline/zombienet.yml b/.gitlab/pipeline/zombienet.yml
@@ -1,7 +1,7 @@
 .zombienet-refs:
   extends: .build-refs
   variables:
-    ZOMBIENET_IMAGE: "docker.io/paritytech/zombienet:v1.3.104"
+    ZOMBIENET_IMAGE: "docker.io/paritytech/zombienet:v1.3.105"
     PUSHGATEWAY_URL: "http://zombienet-prometheus-pushgateway.managed-monitoring:9091/metrics/job/zombie-metrics"
     DEBUG: "zombie,zombie::network-node,zombie::kube::client::logs"
 

diff --git a/.gitlab/pipeline/zombienet/polkadot.yml b/.gitlab/pipeline/zombienet/polkadot.yml
@@ -183,6 +183,22 @@ zombienet-polkadot-functional-0012-spam-statement-distribution-requests:
       --local-dir="${LOCAL_DIR}/functional"
       --test="0012-spam-statement-distribution-requests.zndsl"
 
+zombienet-polkadot-functional-0013-systematic-chunk-recovery:
+  extends:
+    - .zombienet-polkadot-common
+  script:
+    - /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh
+      --local-dir="${LOCAL_DIR}/functional"
+      --test="0013-systematic-chunk-recovery.zndsl"
+
+zombienet-polkadot-functional-0014-chunk-fetching-network-compatibility:
+  extends:
+    - .zombienet-polkadot-common
+  script:
+    - /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh
+      --local-dir="${LOCAL_DIR}/functional"
+      --test="0014-chunk-fetching-network-compatibility.zndsl"
+
 zombienet-polkadot-smoke-0001-parachains-smoke-test:
   extends:
     - .zombienet-polkadot-common

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/cumulus/client/pov-recovery/src/active_candidate_recovery.rs b/cumulus/client/pov-recovery/src/active_candidate_recovery.rs
@@ -56,6 +56,7 @@ impl<Block: BlockT> ActiveCandidateRecovery<Block> {
 					candidate.receipt.clone(),
 					candidate.session_index,
 					None,
+					None,
 					tx,
 				),
 				"ActiveCandidateRecovery",

diff --git a/cumulus/client/relay-chain-minimal-node/src/lib.rs b/cumulus/client/relay-chain-minimal-node/src/lib.rs
@@ -285,5 +285,8 @@ fn build_request_response_protocol_receivers<
 	let cfg =
 		Protocol::ChunkFetchingV1.get_outbound_only_config::<_, Network>(request_protocol_names);
 	config.add_request_response_protocol(cfg);
+	let cfg =
+		Protocol::ChunkFetchingV2.get_outbound_only_config::<_, Network>(request_protocol_names);
+	config.add_request_response_protocol(cfg);
 	(collation_req_v1_receiver, collation_req_v2_receiver, available_data_req_receiver)
 }
diff --git a/cumulus/test/service/src/lib.rs b/cumulus/test/service/src/lib.rs
@@ -152,15 +152,16 @@ impl RecoveryHandle for FailingRecoveryHandle {
 		message: AvailabilityRecoveryMessage,
 		origin: &'static str,
 	) {
-		let AvailabilityRecoveryMessage::RecoverAvailableData(ref receipt, _, _, _) = message;
+		let AvailabilityRecoveryMessage::RecoverAvailableData(ref receipt, _, _, _, _) = message;
 		let candidate_hash = receipt.hash();
 
 		// For every 3rd block we immediately signal unavailability to trigger
 		// a retry. The same candidate is never failed multiple times to ensure progress.
 		if self.counter % 3 == 0 && self.failed_hashes.insert(candidate_hash) {
 			tracing::info!(target: LOG_TARGET, ?candidate_hash, "Failing pov recovery.");
 
-			let AvailabilityRecoveryMessage::RecoverAvailableData(_, _, _, back_sender) = message;
+			let AvailabilityRecoveryMessage::RecoverAvailableData(_, _, _, _, back_sender) =
+				message;
 			back_sender
 				.send(Err(RecoveryError::Unavailable))
 				.expect("Return channel should work here.");

diff --git a/polkadot/erasure-coding/Cargo.toml b/polkadot/erasure-coding/Cargo.toml
@@ -19,6 +19,7 @@ sp-trie = { path = "../../substrate/primitives/trie" }
 thiserror = { workspace = true }
 
 [dev-dependencies]
+quickcheck = { version = "1.0.3", default-features = false }
 criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
 
 [[bench]]

diff --git a/polkadot/erasure-coding/benches/README.md b/polkadot/erasure-coding/benches/README.md
@@ -7,7 +7,8 @@ cargo bench
 ## `scaling_with_validators`
 
 This benchmark evaluates the performance of constructing the chunks and the erasure root from PoV and
-reconstructing the PoV from chunks. You can see the results of running this bench on 5950x below.
+reconstructing the PoV from chunks (either from systematic chunks or regular chunks).
+You can see the results of running this bench on 5950x below (only including recovery from regular chunks).
 Interestingly, with `10_000` chunks (validators) its slower than with `50_000` for both construction
 and reconstruction.
 ```
@@ -37,3 +38,6 @@ reconstruct/10000       time:   [496.35 ms 505.17 ms 515.42 ms]
 reconstruct/50000       time:   [276.56 ms 277.53 ms 278.58 ms]
                         thrpt:  [17.948 MiB/s 18.016 MiB/s 18.079 MiB/s]
 ```
+
+Results from running on an Apple M2 Pro, systematic recovery is generally 40 times faster than
+regular recovery, achieving 1 Gib/s.
diff --git a/polkadot/erasure-coding/benches/scaling_with_validators.rs b/polkadot/erasure-coding/benches/scaling_with_validators.rs
@@ -53,12 +53,16 @@ fn construct_and_reconstruct_5mb_pov(c: &mut Criterion) {
 	}
 	group.finish();
 
-	let mut group = c.benchmark_group("reconstruct");
+	let mut group = c.benchmark_group("reconstruct_regular");
 	for n_validators in N_VALIDATORS {
 		let all_chunks = chunks(n_validators, &pov);
 
-		let mut c: Vec<_> = all_chunks.iter().enumerate().map(|(i, c)| (&c[..], i)).collect();
-		let last_chunks = c.split_off((c.len() - 1) * 2 / 3);
+		let chunks: Vec<_> = all_chunks
+			.iter()
+			.enumerate()
+			.take(polkadot_erasure_coding::recovery_threshold(n_validators).unwrap())
+			.map(|(i, c)| (&c[..], i))
+			.collect();
 
 		group.throughput(Throughput::Bytes(pov.len() as u64));
 		group.bench_with_input(
@@ -67,7 +71,31 @@ fn construct_and_reconstruct_5mb_pov(c: &mut Criterion) {
 			|b, &n| {
 				b.iter(|| {
 					let _pov: Vec<u8> =
-						polkadot_erasure_coding::reconstruct(n, last_chunks.clone()).unwrap();
+						polkadot_erasure_coding::reconstruct(n, chunks.clone()).unwrap();
+				});
+			},
+		);
+	}
+	group.finish();
+
+	let mut group = c.benchmark_group("reconstruct_systematic");
+	for n_validators in N_VALIDATORS {
+		let all_chunks = chunks(n_validators, &pov);
+
+		let chunks = all_chunks
+			.into_iter()
+			.take(polkadot_erasure_coding::systematic_recovery_threshold(n_validators).unwrap())
+			.collect::<Vec<_>>();
+
+		group.throughput(Throughput::Bytes(pov.len() as u64));
+		group.bench_with_input(
+			BenchmarkId::from_parameter(n_validators),
+			&n_validators,
+			|b, &n| {
+				b.iter(|| {
+					let _pov: Vec<u8> =
+						polkadot_erasure_coding::reconstruct_from_systematic(n, chunks.clone())
+							.unwrap();
 				});
 			},
 		);

diff --git a/polkadot/erasure-coding/src/lib.rs b/polkadot/erasure-coding/src/lib.rs
@@ -69,6 +69,9 @@ pub enum Error {
 	/// Bad payload in reconstructed bytes.
 	#[error("Reconstructed payload invalid")]
 	BadPayload,
+	/// Unable to decode reconstructed bytes.
+	#[error("Unable to decode reconstructed payload: {0}")]
+	Decode(#[source] parity_scale_codec::Error),
 	/// Invalid branch proof.
 	#[error("Invalid branch proof")]
 	InvalidBranchProof,
@@ -110,6 +113,14 @@ pub const fn recovery_threshold(n_validators: usize) -> Result<usize, Error> {
 	Ok(needed + 1)
 }
 
+/// Obtain the threshold of systematic chunks that should be enough to recover the data.
+///
+/// If the regular `recovery_threshold` is a power of two, then it returns the same value.
+/// Otherwise, it returns the next lower power of two.
+pub fn systematic_recovery_threshold(n_validators: usize) -> Result<usize, Error> {
+	code_params(n_validators).map(|params| params.k())
+}
+
 fn code_params(n_validators: usize) -> Result<CodeParams, Error> {
 	// we need to be able to reconstruct from 1/3 - eps
 
@@ -127,6 +138,41 @@ fn code_params(n_validators: usize) -> Result<CodeParams, Error> {
 	})
 }
 
+/// Reconstruct the v1 available data from the set of systematic chunks.
+///
+/// Provide a vector containing chunk data. If too few chunks are provided, recovery is not
+/// possible.
+pub fn reconstruct_from_systematic_v1(
+	n_validators: usize,
+	chunks: Vec<Vec<u8>>,
+) -> Result<AvailableData, Error> {
+	reconstruct_from_systematic(n_validators, chunks)
+}
+
+/// Reconstruct the available data from the set of systematic chunks.
+///
+/// Provide a vector containing the first k chunks in order. If too few chunks are provided,
+/// recovery is not possible.
+pub fn reconstruct_from_systematic<T: Decode>(
+	n_validators: usize,
+	chunks: Vec<Vec<u8>>,
+) -> Result<T, Error> {
+	let code_params = code_params(n_validators)?;
+	let k = code_params.k();
+
+	for chunk_data in chunks.iter().take(k) {
+		if chunk_data.len() % 2 != 0 {
+			return Err(Error::UnevenLength)
+		}
+	}
+
+	let bytes = code_params.make_encoder().reconstruct_from_systematic(
+		chunks.into_iter().take(k).map(|data| WrappedShard::new(data)).collect(),
+	)?;
+
+	Decode::decode(&mut &bytes[..]).map_err(|err| Error::Decode(err))
+}
+
 /// Obtain erasure-coded chunks for v1 `AvailableData`, one for each validator.
 ///
 /// Works only up to 65536 validators, and `n_validators` must be non-zero.
@@ -285,13 +331,41 @@ pub fn branch_hash(root: &H256, branch_nodes: &Proof, index: usize) -> Result<H2
 
 #[cfg(test)]
 mod tests {
+	use std::sync::Arc;
+
 	use super::*;
 	use polkadot_node_primitives::{AvailableData, BlockData, PoV};
+	use polkadot_primitives::{HeadData, PersistedValidationData};
+	use quickcheck::{Arbitrary, Gen, QuickCheck};
 
 	// In order to adequately compute the number of entries in the Merkle
 	// trie, we must account for the fixed 16-ary trie structure.
 	const KEY_INDEX_NIBBLE_SIZE: usize = 4;
 
+	#[derive(Clone, Debug)]
+	struct ArbitraryAvailableData(AvailableData);
+
+	impl Arbitrary for ArbitraryAvailableData {
+		fn arbitrary(g: &mut Gen) -> Self {
+			// Limit the POV len to 1 mib, otherwise the test will take forever
+			let pov_len = (u32::arbitrary(g) % (1024 * 1024)).max(2);
+
+			let pov = (0..pov_len).map(|_| u8::arbitrary(g)).collect();
+
+			let pvd = PersistedValidationData {
+				parent_head: HeadData((0..u16::arbitrary(g)).map(|_| u8::arbitrary(g)).collect()),
+				relay_parent_number: u32::arbitrary(g),
+				relay_parent_storage_root: [u8::arbitrary(g); 32].into(),
+				max_pov_size: u32::arbitrary(g),
+			};
+
+			ArbitraryAvailableData(AvailableData {
+				pov: Arc::new(PoV { block_data: BlockData(pov) }),
+				validation_data: pvd,
+			})
+		}
+	}
+
 	#[test]
 	fn field_order_is_right_size() {
 		assert_eq!(MAX_VALIDATORS, 65536);
@@ -318,6 +392,25 @@ mod tests {
 		assert_eq!(reconstructed, available_data);
 	}
 
+	#[test]
+	fn round_trip_systematic_works() {
+		fn property(available_data: ArbitraryAvailableData, n_validators: u16) {
+			let n_validators = n_validators.max(2);
+			let kpow2 = systematic_recovery_threshold(n_validators as usize).unwrap();
+			let chunks = obtain_chunks(n_validators as usize, &available_data.0).unwrap();
+			assert_eq!(
+				reconstruct_from_systematic_v1(
+					n_validators as usize,
+					chunks.into_iter().take(kpow2).collect()
+				)
+				.unwrap(),
+				available_data.0
+			);
+		}
+
+		QuickCheck::new().quickcheck(property as fn(ArbitraryAvailableData, u16))
+	}
+
 	#[test]
 	fn reconstruct_does_not_panic_on_low_validator_count() {
 		let reconstructed = reconstruct_v1(1, [].iter().cloned());