diff --git a/fec.cabal b/fec.cabal index bfbdb63..be110f9 100644 --- a/fec.cabal +++ b/fec.cabal @@ -1,6 +1,6 @@ cabal-version: 3.4 name: fec -version: 0.1.1 +version: 0.2.0 license: GPL-2.0-or-later license-file: README.rst author: Adam Langley @@ -35,7 +35,15 @@ library exposed-modules: Codec.FEC default-extensions: ForeignFunctionInterface hs-source-dirs: haskell - ghc-options: -Wall + + -- Try to keep the warning options here in sync with those in the + -- test-suite. + ghc-options: + -Wall -Weverything -Wno-implicit-prelude + -Wno-prepositive-qualified-module -Wno-all-missed-specialisations + -Wno-missing-safe-haskell-mode -Wno-unsafe + -Wno-missing-deriving-strategies + c-sources: zfec/fec.c cc-options: -std=c99 include-dirs: zfec @@ -45,11 +53,31 @@ test-suite tests main-is: FECTest.hs other-modules: hs-source-dirs: haskell/test - ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N + + -- Turn on a lot of warnings. Some warnings we turn off: + -- + -- * implicit-prelude: We use the implicit prelude a lot for now. + -- + -- * prepositive-qualified-module: prepositive qualified is really common + -- and I don't want to reformat everything right now. + -- + -- * all-missed-specialisations: I don't really understand the implications + -- of messing with INLINABLE. + -- + -- * missing-safe-haskell-mode: Yea, well, the FEC implementation is + -- actually unsafe, so. + -- + -- * missing-deriving-strategies: The default deriving strategy is + -- unsurprising. + ghc-options: + -Wall -Weverything -threaded -rtsopts -with-rtsopts=-N + -Wno-implicit-prelude -Wno-prepositive-qualified-module + -Wno-all-missed-specialisations -Wno-missing-safe-haskell-mode + -Wno-unsafe -Wno-missing-deriving-strategies + build-depends: , base , bytestring - , data-serializer , fec , hspec , QuickCheck diff --git a/haskell/Codec/FEC.hs b/haskell/Codec/FEC.hs index ff16c26..93f93d6 100644 --- a/haskell/Codec/FEC.hs +++ b/haskell/Codec/FEC.hs @@ -15,7 +15,7 @@ -- numbered 0..(n - 1) and blocks numbered < k are the primary blocks. module Codec.FEC ( - FECParams(paramK, paramN) + FECParams(cfec, paramK, paramN) , fec , encode , decode @@ -29,22 +29,20 @@ module Codec.FEC ( import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as BU -import qualified Data.ByteString.Internal as BI import Data.Word (Word8) import Data.Bits (xor) import Data.List (sortBy, partition, (\\), nub) -import Foreign.Ptr +import Foreign.Ptr (Ptr, FunPtr, castPtr) import Foreign.Storable (sizeOf, poke) -import Foreign.ForeignPtr -import Foreign.C.Types -import Foreign.Marshal.Alloc +import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, newForeignPtr) +import Foreign.C.Types (CSize(CSize), CUInt(CUInt)) +import Foreign.Marshal.Alloc (allocaBytes) import Foreign.Marshal.Array (withArray, advancePtr) -import System.IO (withFile, IOMode(..)) -import System.IO.Unsafe (unsafePerformIO) +import System.IO (withFile, IOMode(ReadMode)) data CFEC data FECParams = FECParams - { cfec :: (ForeignPtr CFEC) + { cfec :: ForeignPtr CFEC , paramK :: Int , paramN :: Int } @@ -82,40 +80,41 @@ isValidConfig k n -- | Return a FEC with the given parameters. fec :: Int -- ^ the number of primary blocks -> Int -- ^ the total number blocks, must be < 256 - -> FECParams + -> IO FECParams fec k n = if not (isValidConfig k n) then error $ "Invalid FEC parameters: " ++ show k ++ " " ++ show n - else unsafePerformIO (do - cfec <- _new (fromIntegral k) (fromIntegral n) - params <- newForeignPtr _free cfec - return $ FECParams params k n) + else do + cfec' <- _new (fromIntegral k) (fromIntegral n) + params <- newForeignPtr _free cfec' + return $ FECParams params k n -- | Create a C array of unsigned from an input array -uintCArray :: [Int] -> ((Ptr CUInt) -> IO a) -> IO a -uintCArray xs f = withArray (map fromIntegral xs) f +uintCArray :: [Int] -> (Ptr CUInt -> IO a) -> IO a +uintCArray xs = withArray (map fromIntegral xs) -- | Convert a list of ByteStrings to an array of pointers to their data -byteStringsToArray :: [B.ByteString] -> ((Ptr (Ptr Word8)) -> IO a) -> IO a +byteStringsToArray :: [B.ByteString] -> (Ptr (Ptr Word8) -> IO a) -> IO a byteStringsToArray inputs f = do let l = length inputs - allocaBytes (l * sizeOf (undefined :: Ptr Word8)) (\array -> do - let inner _ [] = f array - inner array' (bs : bss) = BU.unsafeUseAsCString bs (\ptr -> do + allocaBytes (l * sizeOf (undefined :: Ptr (Ptr Word8))) (\array -> do + let inner :: (Ptr (Ptr Word8) -> IO a) -> Ptr (Ptr Word8) -> [B.ByteString] -> IO a + inner f' _ [] = f' array + inner f' array' (bs : bss) = BU.unsafeUseAsCString bs (\ptr -> do poke array' $ castPtr ptr - inner (advancePtr array' 1) bss) - inner array inputs) + inner f' (advancePtr array' 1) bss) + inner f array inputs) -- | Return True iff all the given ByteStrings are the same length allByteStringsSameLength :: [B.ByteString] -> Bool allByteStringsSameLength [] = True -allByteStringsSameLength (bs : bss) = all ((==) (B.length bs)) $ map B.length bss +allByteStringsSameLength (bs : bss) = all ((B.length bs ==) . B.length) bss -- | Run the given function with a pointer to an array of @n@ pointers to -- buffers of size @size@. Return these buffers as a list of ByteStrings createByteStringArray :: Int -- ^ the number of buffers requested -> Int -- ^ the size of each buffer - -> ((Ptr (Ptr Word8)) -> IO ()) + -> (Ptr (Ptr Word8) -> IO ()) -> IO [B.ByteString] createByteStringArray n size f = do allocaBytes (n * sizeOf (undefined :: Ptr Word8)) (\array -> do @@ -129,17 +128,17 @@ createByteStringArray n size f = do -- @k@ primary blocks. encode :: FECParams -> [B.ByteString] -- ^ a list of @k@ input blocks - -> [B.ByteString] -- ^ (n - k) output blocks + -> IO [B.ByteString] -- ^ (n - k) output blocks encode (FECParams params k n) inblocks | length inblocks /= k = error "Wrong number of blocks to FEC encode" | not (allByteStringsSameLength inblocks) = error "Not all inputs to FEC encode are the same length" - | otherwise = unsafePerformIO (do + | otherwise = do let sz = B.length $ head inblocks - withForeignPtr params (\cfec -> do + withForeignPtr params (\cfec' -> do byteStringsToArray inblocks (\src -> do createByteStringArray (n - k) sz (\fecs -> do uintCArray [k..(n - 1)] (\block_nums -> do - _encode cfec src fecs block_nums (fromIntegral (n - k)) $ fromIntegral sz))))) + _encode cfec' src fecs block_nums (fromIntegral (n - k)) $ fromIntegral sz)))) -- | A sort function for tagged assoc lists sortTagged :: [(Int, a)] -> [(Int, a)] @@ -148,8 +147,9 @@ sortTagged = sortBy (\a b -> compare (fst a) (fst b)) -- | Reorder the given list so that elements with tag numbers < the first -- argument have an index equal to their tag number (if possible) reorderPrimaryBlocks :: Int -> [(Int, a)] -> [(Int, a)] -reorderPrimaryBlocks n blocks = inner (sortTagged pBlocks) sBlocks [] where - (pBlocks, sBlocks) = partition (\(tag, _) -> tag < n) blocks +reorderPrimaryBlocks n blocks = inner (sortTagged primaryBlocks) secondaryBlocks [] where + (primaryBlocks, secondaryBlocks) = partition (\(tag, _) -> tag < n) blocks + inner :: [(Int, a)] -> [(Int, a)] -> [(Int, a)] -> [(Int, a)] inner [] sBlocks acc = acc ++ sBlocks inner pBlocks [] acc = acc ++ pBlocks inner pBlocks@((tag, a) : ps) sBlocks@(s : ss) acc = @@ -161,25 +161,25 @@ reorderPrimaryBlocks n blocks = inner (sortTagged pBlocks) sBlocks [] where -- tagged with its number (see the module comments about block numbering) decode :: FECParams -> [(Int, B.ByteString)] -- ^ a list of @k@ blocks and their index - -> [B.ByteString] -- ^ a list the @k@ primary blocks + -> IO [B.ByteString] -- ^ a list the @k@ primary blocks decode (FECParams params k n) inblocks - | length (nub $ map fst inblocks) /= length (inblocks) = error "Duplicate input blocks in FEC decode" - | any (\f -> f < 0 || f >= n) $ map fst inblocks = error "Invalid block numbers in FEC decode" + | length (nub $ map fst inblocks) /= length inblocks = error "Duplicate input blocks in FEC decode" + | any ((\f -> f < 0 || f >= n) . fst) inblocks = error "Invalid block numbers in FEC decode" | length inblocks /= k = error "Wrong number of blocks to FEC decode" | not (allByteStringsSameLength $ map snd inblocks) = error "Not all inputs to FEC decode are same length" - | otherwise = unsafePerformIO (do + | otherwise = do let sz = B.length $ snd $ head inblocks inblocks' = reorderPrimaryBlocks k inblocks presentBlocks = map fst inblocks' - withForeignPtr params (\cfec -> do + withForeignPtr params (\cfec' -> do byteStringsToArray (map snd inblocks') (\src -> do b <- createByteStringArray (n - k) sz (\out -> do uintCArray presentBlocks (\block_nums -> do - _decode cfec src out block_nums $ fromIntegral sz)) + _decode cfec' src out block_nums $ fromIntegral sz)) let blocks = [0..(n - 1)] \\ presentBlocks tagged = zip blocks b allBlocks = sortTagged $ tagged ++ inblocks' - return $ take k $ map snd allBlocks))) + return $ take k $ map snd allBlocks)) -- | Break a ByteString into @n@ parts, equal in length to the original, such -- that all @n@ are required to reconstruct the original, but having less @@ -197,11 +197,12 @@ secureDivide :: Int -- ^ the number of parts requested secureDivide n input | n < 0 = error "secureDivide called with negative number of parts" | otherwise = withFile "/dev/urandom" ReadMode (\handle -> do - let inner 1 bs = return [bs] - inner n bs = do + let inner :: Int -> B.ByteString -> IO [B.ByteString] + inner 1 bs = return [bs] + inner n' bs = do mask <- B.hGet handle (B.length bs) let masked = B.pack $ B.zipWith xor bs mask - rest <- inner (n - 1) masked + rest <- inner (n' - 1) masked return (mask : rest) inner n input) @@ -219,32 +220,40 @@ secureCombine (a : rest) = B.pack $ B.zipWith xor a $ secureCombine rest enFEC :: Int -- ^ the number of blocks required to reconstruct -> Int -- ^ the total number of blocks -> B.ByteString -- ^ the data to divide - -> [B.ByteString] -- ^ the resulting blocks -enFEC k n input = taggedPrimaryBlocks ++ taggedSecondaryBlocks where - taggedPrimaryBlocks = map (uncurry B.cons) $ zip [0..] primaryBlocks - taggedSecondaryBlocks = map (uncurry B.cons) $ zip [(fromIntegral k)..] secondaryBlocks - remainder = B.length input `mod` k - paddingLength = if remainder >= 1 then (k - remainder) else k - paddingBytes = (B.replicate (paddingLength - 1) 0) `B.append` (B.singleton $ fromIntegral paddingLength) - divide a bs - | B.null bs = [] - | otherwise = (B.take a bs) : (divide a $ B.drop a bs) - input' = input `B.append` paddingBytes - blockSize = B.length input' `div` k - primaryBlocks = divide blockSize input' - secondaryBlocks = encode params primaryBlocks - params = fec k n + -> IO [B.ByteString] -- ^ the resulting blocks +enFEC k n input = + let + taggedPrimaryBlocks = zipWith B.cons [0..] primaryBlocks + taggedSecondaryBlocks = zipWith B.cons [(fromIntegral k)..] + remainder = B.length input `mod` k + paddingLength = if remainder >= 1 then k - remainder else k + paddingBytes = B.replicate (paddingLength - 1) 0 `B.append` (B.singleton . fromIntegral $ paddingLength) + divide a bs + | B.null bs = [] + | otherwise = B.take a bs : (divide a . B.drop a $ bs) + input' = input `B.append` paddingBytes + blockSize = B.length input' `div` k + primaryBlocks = divide blockSize input' + in do + params <- fec k n + secondaryBlocks <- encode params primaryBlocks + pure $ taggedPrimaryBlocks ++ taggedSecondaryBlocks secondaryBlocks -- | Reverses the operation of @enFEC@. deFEC :: Int -- ^ the number of blocks required (matches call to @enFEC@) -> Int -- ^ the total number of blocks (matches call to @enFEC@) -> [B.ByteString] -- ^ a list of k, or more, blocks from @enFEC@ - -> B.ByteString + -> IO B.ByteString deFEC k n inputs | length inputs < k = error "Too few inputs to deFEC" - | otherwise = B.take (B.length fecOutput - paddingLength) fecOutput where - paddingLength = fromIntegral $ B.last fecOutput + | otherwise = + let + paddingLength :: B.ByteString -> Int + paddingLength output = fromIntegral $ B.last output inputs' = take k inputs + taggedInputs :: [(Int, B.ByteString)] taggedInputs = map (\bs -> (fromIntegral $ B.head bs, B.tail bs)) inputs' - fecOutput = B.concat $ decode params taggedInputs - params = fec k n + in do + params <- fec k n + fecOutput <- B.concat <$> decode params taggedInputs + pure $ B.take (B.length fecOutput - paddingLength fecOutput) fecOutput diff --git a/haskell/test/FECTest.hs b/haskell/test/FECTest.hs index a1b3da2..da61d7e 100644 --- a/haskell/test/FECTest.hs +++ b/haskell/test/FECTest.hs @@ -1,29 +1,26 @@ {-# LANGUAGE DerivingStrategies #-} -module Main where +module Main (main) where -import Test.Hspec +import Test.Hspec (Expectation, it, describe, hspec, shouldBe, parallel) import qualified Codec.FEC as FEC import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL -import Data.Int -import Data.List (sortOn) -import Data.Serializer -import Data.Word -import System.IO (IOMode (..), withFile) -import System.Random -import Test.QuickCheck -import Test.QuickCheck.Monadic + +import Data.List (sortOn) +import Data.Word (Word8, Word16) +import System.Random (randoms, mkStdGen) +import Test.QuickCheck (withMaxSuccess, once, choose, Testable(property), Arbitrary(arbitrary), Property) +import Test.QuickCheck.Monadic (monadicIO, run, assert) -- Imported for the orphan Arbitrary ByteString instance. import Test.QuickCheck.Instances.ByteString () -- | Valid ZFEC parameters. data Params = Params - { required :: Int -- aka k - , total :: Int -- aka n + { paramsRequired :: Int -- aka k + , paramsTotal :: Int -- aka n } deriving (Show, Ord, Eq) @@ -34,11 +31,6 @@ instance Arbitrary Params where total <- choose (required, 255) return $ Params required total -instance Arbitrary FEC.FECParams where - arbitrary = do - (Params required total) <- arbitrary :: Gen Params - return $ FEC.fec required total - randomTake :: Int -> Int -> [a] -> [a] randomTake seed n values = map snd $ take n sortedValues where @@ -48,71 +40,87 @@ randomTake seed n values = map snd $ take n sortedValues rnds = randoms gen gen = mkStdGen seed --- | Any combination of the inputs blocks and the output blocks from --- @FEC.encode@, as long as there are at least @k@ of them, can be recombined --- using @FEC.decode@ to produce the original input blocks. --- -testFEC - :: FEC.FECParams - -- ^ The FEC parameters to exercise. - -> Word16 - -- ^ The length of the blocks to exercise. - -> Int - -- ^ A random seed to use to be able to vary the choice of which blocks to - -- try to decode. - -> Bool - -- ^ True if the encoded input was reconstructed by decoding, False - -- otherwise. -testFEC fec len seed = FEC.decode fec someTaggedBlocks == origBlocks - where - -- Construct some blocks. Each will just be the byte corresponding to the - -- block number repeated to satisfy the requested length. - origBlocks = B.replicate (fromIntegral len) . fromIntegral <$> [0 .. (FEC.paramK fec - 1)] - - -- Encode the data to produce the "secondary" blocks which (might) add - -- redundancy to the original blocks. - secondaryBlocks = FEC.encode fec origBlocks - - -- Tag each block with its block number because the decode API requires - -- this information. - taggedBlocks = zip [0 ..] (origBlocks ++ secondaryBlocks) - - -- Choose enough of the tagged blocks (some combination of original and - -- secondary) to try to use for decoding. - someTaggedBlocks = randomTake seed (FEC.paramK fec) taggedBlocks +{- | Any combination of the inputs blocks and the output blocks from + @FEC.encode@, as long as there are at least @k@ of them, can be recombined + using @FEC.decode@ to produce the original input blocks. +-} +testFEC :: + -- | The FEC parameters to exercise. + FEC.FECParams -> + -- | The length of the blocks to exercise. + Word16 -> + -- | A random seed to use to be able to vary the choice of which blocks to + -- try to decode. + Int -> + -- | True if the encoded input was reconstructed by decoding, False + -- otherwise. + Expectation +testFEC fec len seed = + let -- Construct some blocks. Each will just be the byte corresponding to the + -- block number repeated to satisfy the requested length. + origBlocks = B.replicate (fromIntegral len) . fromIntegral <$> [0 .. (FEC.paramK fec - 1)] + in do + -- Encode the data to produce the "secondary" blocks which (might) add + -- redundancy to the original blocks. + secondaryBlocks <- FEC.encode fec origBlocks + + let -- Tag each block with its block number because the decode API requires + -- this information. + taggedBlocks :: [(Int, B.ByteString)] + taggedBlocks = zip [0 ..] (origBlocks ++ secondaryBlocks) + + -- Choose enough of the tagged blocks (some combination of original and + -- secondary) to try to use for decoding. + someTaggedBlocks = randomTake seed (FEC.paramK fec) taggedBlocks + + decoded <- FEC.decode fec someTaggedBlocks + decoded `shouldBe` origBlocks -- | @FEC.secureDivide@ is the inverse of @FEC.secureCombine@. prop_divide :: Word16 -> Word8 -> Word8 -> Property prop_divide size byte divisor = monadicIO $ do - let input = B.replicate (fromIntegral size + 1) byte - parts <- run $ FEC.secureDivide (fromIntegral divisor) input - assert (FEC.secureCombine parts == input) + let input = B.replicate (fromIntegral size + 1) byte + parts <- run $ FEC.secureDivide (fromIntegral divisor) input + assert (FEC.secureCombine parts == input) -- | @FEC.encode@ is the inverse of @FEC.decode@. -prop_decode :: FEC.FECParams -> Word16 -> Int -> Property -prop_decode fec len seed = property $ testFEC fec len seed +prop_decode :: Params -> Word16 -> Int -> Property +prop_decode (Params required total) len seed = + monadicIO . run $ do + fec <- FEC.fec required total + testFEC fec len seed + +prop_primary_copies :: Params -> B.ByteString -> Property +prop_primary_copies (Params _ total) primary = monadicIO $ do + fec <- run $ FEC.fec 1 total + secondary <- run $ FEC.encode fec [primary] + assert $ all (primary ==) secondary -- | @FEC.enFEC@ is the inverse of @FEC.deFEC@. prop_deFEC :: Params -> B.ByteString -> Property -prop_deFEC (Params required total) testdata = - FEC.deFEC required total minimalShares === testdata - where - allShares = FEC.enFEC required total testdata - minimalShares = take required allShares +prop_deFEC (Params required total) testdata = monadicIO $ do + encoded <- run $ FEC.enFEC required total testdata + decoded <- run $ FEC.deFEC required total (take required encoded) + assert $ testdata == decoded main :: IO () -main = hspec $ do - describe "secureCombine" $ do - -- secureDivide is insanely slow and memory hungry for large inputs, - -- like QuickCheck will find with it as currently defined. Just pass - -- some small inputs. It's not clear it's worth fixing (or even - -- keeping) thesefunctions. They don't seem to be used by anything. - -- Why are they here? - it "is the inverse of secureDivide n" $ once $ prop_divide 1024 65 3 - - describe "deFEC" $ do - it "is the inverse of enFEC" $ (withMaxSuccess 2000 prop_deFEC) - - describe "decode" $ do - it "is (nearly) the inverse of encode" $ (withMaxSuccess 2000 prop_decode) - it "works with required=255" $ property $ prop_decode (FEC.fec 255 255) +main = hspec $ + parallel $ do + describe "secureCombine" $ do + -- secureDivide is insanely slow and memory hungry for large inputs, + -- like QuickCheck will find with it as currently defined. Just pass + -- some small inputs. It's not clear it's worth fixing (or even + -- keeping) thesefunctions. They don't seem to be used by anything. + -- Why are they here? + it "is the inverse of secureDivide n" $ once $ prop_divide 1024 65 3 + + describe "deFEC" $ do + it "is the inverse of enFEC" $ withMaxSuccess 2000 prop_deFEC + + describe "decode" $ do + it "is (nearly) the inverse of encode" $ withMaxSuccess 2000 prop_decode + it "works with total=255" $ property $ prop_decode (Params 1 255) + it "works with required=255" $ property $ prop_decode (Params 255 255) + + describe "encode" $ do + it "returns copies of the primary block for all 1 of N encodings" $ property $ withMaxSuccess 10000 prop_primary_copies