Implement spec-compliant share splitting (#246)

* Export block data compute shares. * Refactor to use ShareSize constant directly. * Change message splitting to prefix namespace ID. * Implement chunking for contiguous. * Add termination condition. * Rename append contiguous to split contiguous. * Update test for small tx. * Add test for two contiguous. * Make tx and msg adjusted share sizes exported constants. * Panic on hopefully-unreachable condition instead of silently skipping. * Update hardcoded response for block format. Co-authored-by: Ismail Khoffi <Ismail.Khoffi@gmail.com>
celestiaorg · Sep 22, 2021 · 59a9c1a · 59a9c1a
1 parent f9df60e
commit 59a9c1a
Show file tree

Hide file tree

Showing 4 changed files with 142 additions and 44 deletions.
diff --git a/types/block.go b/types/block.go
@@ -1120,26 +1120,26 @@ type IntermediateStateRoots struct {
 	RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"`
 }
 
-func (roots IntermediateStateRoots) splitIntoShares(shareSize int) NamespacedShares {
+func (roots IntermediateStateRoots) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, root := range roots.RawRootsList {
 		rawData, err := root.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root))
 		}
-		shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, consts.IntermediateStateRootsNamespaceID, rawData)
 	}
 	return shares
 }
 
-func (msgs Messages) splitIntoShares(shareSize int) NamespacedShares {
+func (msgs Messages) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, m := range msgs.MessagesList {
 		rawData, err := m.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app accepted a Message that can not be encoded %#v", m))
 		}
-		shares = appendToShares(shares, m.NamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, m.NamespaceID, rawData)
 	}
 	return shares
 }
@@ -1346,7 +1346,7 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error {
 	return nil
 }
 
-func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares {
+func (data *EvidenceData) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, ev := range data.Evidence {
 		var rawData []byte
@@ -1367,7 +1367,7 @@ func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares {
 		if err != nil {
 			panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err))
 		}
-		shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, consts.EvidenceNamespaceID, rawData)
 	}
 	return shares
 }

diff --git a/types/shares.go b/types/shares.go
@@ -59,35 +59,93 @@ func (m Message) MarshalDelimited() ([]byte, error) {
 	return append(lenBuf[:n], m.Data...), nil
 }
 
-func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte, shareSize int) []NamespacedShare {
-	if len(rawData) < shareSize {
-		rawShare := rawData
-		paddedShare := zeroPadIfNecessary(rawShare, shareSize)
+// appendToShares appends raw data as shares.
+// Used for messages.
+func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
+	if len(rawData) < consts.MsgShareSize {
+		rawShare := []byte(append(nid, rawData...))
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	} else { // len(rawData) >= MsgShareSize
+		shares = append(shares, split(rawData, nid)...)
+	}
+	return shares
+}
+
+// splitContiguous splits multiple raw data contiguously as shares.
+// Used for transactions, intermediate state roots, and evidence.
+func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	// Index into the outer slice of rawDatas
+	outerIndex := 0
+	// Index into the inner slice of rawDatas
+	innerIndex := 0
+	for outerIndex < len(rawDatas) {
+		var rawData []byte
+		startIndex := 0
+		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, consts.TxShareSize)
+		rawShare := []byte(append(append(nid, byte(startIndex)), rawData...))
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
-	} else { // len(rawData) >= shareSize
-		shares = append(shares, split(rawData, shareSize, nid)...)
 	}
 	return shares
 }
 
 // TODO(ismail): implement corresponding merge method for clients requesting
 // shares for a particular namespace
-func split(rawData []byte, shareSize int, nid namespace.ID) []NamespacedShare {
+func split(rawData []byte, nid namespace.ID) []NamespacedShare {
 	shares := make([]NamespacedShare, 0)
-	firstRawShare := rawData[:shareSize]
+	firstRawShare := []byte(append(nid, rawData[:consts.MsgShareSize]...))
 	shares = append(shares, NamespacedShare{firstRawShare, nid})
-	rawData = rawData[shareSize:]
+	rawData = rawData[consts.MsgShareSize:]
 	for len(rawData) > 0 {
-		shareSizeOrLen := min(shareSize, len(rawData))
-		paddedShare := zeroPadIfNecessary(rawData[:shareSizeOrLen], shareSize)
+		shareSizeOrLen := min(consts.MsgShareSize, len(rawData))
+		rawShare := []byte(append(nid, rawData[:shareSizeOrLen]...))
+		paddedShare := zeroPadIfNecessary(rawShare, consts.ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
 		rawData = rawData[shareSizeOrLen:]
 	}
 	return shares
 }
 
+// getNextChunk gets the next chunk for contiguous shares
+// Precondition: none of the slices in rawDatas is zero-length
+// This precondition should always hold at this point since zero-length txs are simply invalid.
+func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
+	rawData := make([]byte, 0, width)
+	startIndex := 0
+	firstBytesToFetch := 0
+
+	curIndex := 0
+	for curIndex < width && outerIndex < len(rawDatas) {
+		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
+		if bytesToFetch == 0 {
+			panic("zero-length contiguous share data is invalid")
+		}
+		if curIndex == 0 {
+			firstBytesToFetch = bytesToFetch
+		}
+		// If we've already placed some data in this chunk, that means
+		// a new data segment begins
+		if curIndex != 0 {
+			// Offset by the fixed reserved bytes at the beginning of the share
+			startIndex = firstBytesToFetch + consts.NamespaceSize + consts.ShareReservedBytes
+		}
+		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
+		innerIndex += bytesToFetch
+		if innerIndex >= len(rawDatas[outerIndex]) {
+			innerIndex = 0
+			outerIndex++
+		}
+		curIndex += bytesToFetch
+	}
+
+	return rawData, outerIndex, innerIndex, startIndex
+}
+
 func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares {
 	shares := make([]NamespacedShare, n)
 	for i := 0; i < n; i++ {

diff --git a/types/shares_test.go b/types/shares_test.go
@@ -11,13 +11,12 @@ import (
 )
 
 type splitter interface {
-	splitIntoShares(shareSize int) NamespacedShares
+	splitIntoShares() NamespacedShares
 }
 
 func TestMakeShares(t *testing.T) {
 	reservedTxNamespaceID := append(bytes.Repeat([]byte{0}, 7), 1)
 	reservedEvidenceNamespaceID := append(bytes.Repeat([]byte{0}, 7), 3)
-	// resveredIntermediateStateRootsNamespaceID := append(bytes.Repeat([]byte{0}, 7), 2)
 	val := NewMockPV()
 	blockID := makeBlockID([]byte("blockhash"), 1000, []byte("partshash"))
 	blockID2 := makeBlockID([]byte("blockhash2"), 1000, []byte("partshash"))
@@ -38,12 +37,11 @@ func TestMakeShares(t *testing.T) {
 	}
 	msg1Marshaled, _ := msg1.MarshalDelimited()
 	if err != nil {
-		t.Fatalf("Could not encode evidence: %v, error: %v", testEvidence, err)
+		t.Fatalf("Could not encode evidence: %v, error: %v\n", testEvidence, err)
 	}
 
 	type args struct {
-		data      splitter
-		shareSize int
+		data splitter
 	}
 	tests := []struct {
 		name string
@@ -55,59 +53,101 @@ func TestMakeShares(t *testing.T) {
 				data: &EvidenceData{
 					Evidence: []Evidence{testEvidence},
 				},
-				shareSize: consts.ShareSize,
 			}, NamespacedShares{NamespacedShare{
-				Share: testEvidenceBytes[:consts.ShareSize],
-				ID:    reservedEvidenceNamespaceID,
+				Share: append(
+					append(reservedEvidenceNamespaceID, byte(0)),
+					testEvidenceBytes[:consts.TxShareSize]...,
+				),
+				ID: reservedEvidenceNamespaceID,
 			}, NamespacedShare{
-				Share: zeroPadIfNecessary(testEvidenceBytes[consts.ShareSize:], consts.ShareSize),
-				ID:    reservedEvidenceNamespaceID,
+				Share: append(
+					append(reservedEvidenceNamespaceID, byte(0)),
+					zeroPadIfNecessary(testEvidenceBytes[consts.TxShareSize:], consts.TxShareSize)...,
+				),
+				ID: reservedEvidenceNamespaceID,
 			}},
 		},
 		{"small LL Tx",
 			args{
-				data:      Txs{smolTx},
-				shareSize: consts.ShareSize,
+				data: Txs{smolTx},
 			},
 			NamespacedShares{
 				NamespacedShare{
-					Share: zeroPadIfNecessary(smolTxLenDelimited, consts.ShareSize),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						zeroPadIfNecessary(smolTxLenDelimited, consts.TxShareSize)...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 			},
 		},
 		{"one large LL Tx",
 			args{
-				data:      Txs{largeTx},
-				shareSize: consts.ShareSize,
+				data: Txs{largeTx},
 			},
 			NamespacedShares{
 				NamespacedShare{
-					Share: Share(largeTxLenDelimited[:consts.ShareSize]),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						largeTxLenDelimited[:consts.TxShareSize]...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 				NamespacedShare{
-					Share: zeroPadIfNecessary(largeTxLenDelimited[consts.ShareSize:], consts.ShareSize),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						zeroPadIfNecessary(largeTxLenDelimited[consts.TxShareSize:], consts.TxShareSize)...,
+					),
+					ID: reservedTxNamespaceID,
+				},
+			},
+		},
+		{"large then small LL Tx",
+			args{
+				data: Txs{largeTx, smolTx},
+			},
+			NamespacedShares{
+				NamespacedShare{
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						largeTxLenDelimited[:consts.TxShareSize]...,
+					),
+					ID: reservedTxNamespaceID,
+				},
+				NamespacedShare{
+					Share: append(
+						append(reservedTxNamespaceID, byte(len(largeTxLenDelimited)-consts.TxShareSize+consts.NamespaceSize+consts.ShareReservedBytes)),
+						zeroPadIfNecessary(
+							append(largeTxLenDelimited[consts.TxShareSize:], smolTxLenDelimited...),
+							consts.TxShareSize,
+						)...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 			},
 		},
 		{"ll-app message",
 			args{
-				data:      Messages{[]Message{msg1}},
-				shareSize: consts.ShareSize,
+				data: Messages{[]Message{msg1}},
 			},
 			NamespacedShares{
-				NamespacedShare{zeroPadIfNecessary(msg1Marshaled, consts.ShareSize), msg1.NamespaceID},
+				NamespacedShare{
+					Share: append(
+						[]byte(msg1.NamespaceID),
+						zeroPadIfNecessary(msg1Marshaled, consts.MsgShareSize)...,
+					),
+					ID: msg1.NamespaceID,
+				},
 			},
 		},
 	}
 	for i, tt := range tests {
 		tt := tt // stupid scopelint :-/
 		i := i
 		t.Run(tt.name, func(t *testing.T) {
-			if got := tt.args.data.splitIntoShares(tt.args.shareSize); !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("%v: makeShares() = \n%v\nwant\n%v", i, got, tt.want)
+			got := tt.args.data.splitIntoShares()
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("%v: makeShares() = \n%+v\nwant\n%+v\n", i, got, tt.want)
 			}
 		})
 	}

diff --git a/types/tx.go b/types/tx.go
@@ -80,14 +80,14 @@ func (txs Txs) Proof(i int) TxProof {
 	}
 }
 
-func (txs Txs) splitIntoShares(shareSize int) NamespacedShares {
+func (txs Txs) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, tx := range txs {
 		rawData, err := tx.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("included Tx in mem-pool that can not be encoded %v", tx))
 		}
-		shares = appendToShares(shares, consts.TxNamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, consts.TxNamespaceID, rawData)
 	}
 	return shares
 }