{-# LANGUAGE BangPatterns, OverloadedStrings #-}
module LazyByteString where
import Data.Char
import Data.Word
import Data.Monoid
import Data.List
import Control.Monad
import Control.Arrow (second)
import Criterion.Main
import Foreign
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as L
import Data.ByteString.Base64
import Blaze.ByteString.Builder.Internal
import Blaze.ByteString.Builder.Word
import Blaze.ByteString.Builder.ByteString
main :: IO ()
main = do
let (chunkInfos, benchmarks) = unzip
[ lazyVsBlaze
( "base64"
, L.fromChunks . return . encode
, toLazyByteString . encodeBase64
, (\i -> S.drop 13 $ S.pack $ take i $ cycle [0..])
, n)
]
sequence_ (intersperse (putStrLn "") chunkInfos)
putStrLn ""
defaultMain benchmarks
where
n :: Int
n = 100000
lazyVsBlaze :: (String, a -> L.ByteString, a -> L.ByteString, Int -> a, Int)
-> (IO (), Benchmark)
lazyVsBlaze (cmpName, lazy, blaze, prep, n) =
( do putStrLn $ cmpName ++ ": " ++ checkResults
showChunksize implLazy lazy
showChunksize implBlaze blaze
, bgroup cmpName
[ mkBench implBlaze blaze
, mkBench implLazy lazy
]
)
where
implLazy = "bytestring"
implBlaze = "blaze-builder"
x = prep n
nInfo = "for n = " ++ show n
checkResults
| lazy x == blaze x = "implementations agree " ++ nInfo
| otherwise = unlines [ "ERROR: IMPLEMENTATIONS DISAGREE " ++ nInfo
, implLazy ++ ": " ++ show (lazy x)
, implBlaze ++ ": " ++ show (blaze x)
]
showChunksize implName impl = do
let bs = impl x
cs = map S.length $ L.toChunks bs
putStrLn $ " " ++ implName ++ ": "
putStrLn $ " chunks sizes: " ++ show cs
putStrLn $ " avg. chunk size: " ++
show ((fromIntegral (sum cs) :: Double) / fromIntegral (length cs))
mkBench implName impl = bench implName $ whnf (L.length . impl) x
countToZero :: Int -> Maybe (Word8, Int)
countToZero 0 = Nothing
countToZero i = Just (fromIntegral i, i - 1)
unfoldrBlaze :: (a -> Maybe (Word8, a)) -> a -> L.ByteString
unfoldrBlaze f x = toLazyByteString $ fromWriteUnfoldr writeWord8 f x
fromWriteUnfoldr :: (b -> Write) -> (a -> Maybe (b, a)) -> a -> Builder
fromWriteUnfoldr write =
makeBuilder
where
makeBuilder f x0 = fromBuildStepCont $ step x0
where
step x1 !k = fill x1
where
fill x !(BufRange pf0 pe0) = go (f x) pf0
where
go !Nothing !pf = do
let !br' = BufRange pf pe0
k br'
go !(Just (y, x')) !pf
| pf `plusPtr` bound <= pe0 = do
!pf' <- runWrite (write y) pf
go (f x') pf'
| otherwise = return $ bufferFull bound pf $
\(BufRange pfNew peNew) -> do
!pfNew' <- runWrite (write y) pfNew
fill x' (BufRange pfNew' peNew)
where
bound = getBound $ write y
{-# INLINE fromWriteUnfoldr #-}
test :: Int -> (L.ByteString, L.ByteString)
test i =
((L.filter ((==0) . (`mod` 3)) $ x) ,
(filterBlaze ((==0) . (`mod` 3)) $ x))
where
x = L.pack $ take i $ cycle [0..]
filterBlaze :: (Word8 -> Bool) -> L.ByteString -> L.ByteString
filterBlaze f = toLazyByteString . filterLazyByteString f
{-# INLINE filterBlaze #-}
mapBlaze :: (Word8 -> Word8) -> L.ByteString -> L.ByteString
mapBlaze f = toLazyByteString . mapLazyByteString f
{-# INLINE mapBlaze #-}
filterByteString :: (Word8 -> Bool) -> S.ByteString -> Builder
filterByteString p = mapFilterMapByteString id p id
{-# INLINE filterByteString #-}
filterLazyByteString :: (Word8 -> Bool) -> L.ByteString -> Builder
filterLazyByteString p = mapFilterMapLazyByteString id p id
{-# INLINE filterLazyByteString #-}
mapByteString :: (Word8 -> Word8) -> S.ByteString -> Builder
mapByteString f = mapFilterMapByteString f (const True) id
{-# INLINE mapByteString #-}
mapLazyByteString :: (Word8 -> Word8) -> L.ByteString -> Builder
mapLazyByteString f = mapFilterMapLazyByteString f (const True) id
{-# INLINE mapLazyByteString #-}
mapFilterMapByteString :: (Word8 -> Word8) -> (Word8 -> Bool) -> (Word8 -> Word8)
-> S.ByteString -> Builder
mapFilterMapByteString f p g =
\bs -> fromBuildStepCont $ step bs
where
step (S.PS ifp ioff isize) !k =
goBS (unsafeForeignPtrToPtr ifp `plusPtr` ioff)
where
!ipe = unsafeForeignPtrToPtr ifp `plusPtr` (ioff + isize)
goBS !ip0 !br@(BufRange op0 ope)
| ip0 >= ipe = do touchForeignPtr ifp
k br
| op0 < ope = goPartial (ip0 `plusPtr` min outRemaining inpRemaining)
| otherwise = return $ bufferFull 1 op0 (goBS ip0)
where
outRemaining = ope `minusPtr` op0
inpRemaining = ipe `minusPtr` ip0
goPartial !ipeTmp = go ip0 op0
where
go !ip !op
| ip < ipeTmp = do
w <- peek ip
let w' = g w
if p w'
then poke op (f w') >> go (ip `plusPtr` 1) (op `plusPtr` 1)
else go (ip `plusPtr` 1) op
| otherwise =
goBS ip (BufRange op ope)
{-# INLINE mapFilterMapByteString #-}
mapFilterMapLazyByteString :: (Word8 -> Word8) -> (Word8 -> Bool) -> (Word8 -> Word8)
-> L.ByteString -> Builder
mapFilterMapLazyByteString f p g =
L.foldrChunks (\c b -> mapFilterMapByteString f p g c `mappend` b) mempty
{-# INLINE mapFilterMapLazyByteString #-}
fromWriteReplicated :: (a -> Write) -> Int -> a -> Builder
fromWriteReplicated write =
makeBuilder
where
makeBuilder !n0 x = fromBuildStepCont $ step
where
bound = getBound $ write x
step !k = fill n0
where
fill !n1 !(BufRange pf0 pe0) = go n1 pf0
where
go 0 !pf = do
let !br' = BufRange pf pe0
k br'
go n !pf
| pf `plusPtr` bound <= pe0 = do
pf' <- runWrite (write x) pf
go (n-1) pf'
| otherwise = return $ bufferFull bound pf $
\(BufRange pfNew peNew) -> do
pfNew' <- runWrite (write x) pfNew
fill (n-1) (BufRange pfNew' peNew)
{-# INLINE fromWriteReplicated #-}
fromReplicateWord8 :: Int -> Word8 -> Builder
fromReplicateWord8 !n0 x =
fromBuildStepCont $ step
where
step !k = fill n0
where
fill !n !br@(BufRange pf pe)
| n <= 0 = k br
| pf' <= pe = do
_ <- S.memset pf x (fromIntegral n)
let !br' = BufRange pf' pe
k br'
| otherwise = do
let !l = pe `minusPtr` pf
_ <- S.memset pf x (fromIntegral l)
return $ bufferFull 1 pe $ fill (n - l)
where
pf' = pf `plusPtr` n
{-# INLINE fromReplicateWord8 #-}
{-# RULES "fromWriteReplicated/writeWord8"
fromWriteReplicated writeWord8 = fromReplicateWord8
#-}
concatMapBuilder :: (Word8 -> Builder) -> L.ByteString -> Builder
concatMapBuilder f = L.foldr (\w b -> f w `mappend` b) mempty
{-# INLINE concatMapBuilder #-}
concatMapBlaze :: (Word8 -> L.ByteString) -> L.ByteString -> L.ByteString
concatMapBlaze f = toLazyByteString . concatMapBuilder (fromLazyByteString . f)
packBlaze :: [Word8] -> L.ByteString
packBlaze = toLazyByteString . fromWriteList writeWord8
copyBlaze :: L.ByteString -> L.ByteString
copyBlaze = toLazyByteString . copyLazyByteString
intersperseBlocks :: Int -> S.ByteString -> S.ByteString -> Builder
intersperseBlocks blockSize sep (S.PS ifp ioff isize) =
fromPut $ do
lastBS <- go (ip0 `plusPtr` ioff)
unless (S.null lastBS) (putBuilder $ fromByteString lastBS)
where
ip0 = unsafeForeignPtrToPtr ifp
ipe = ip0 `plusPtr` (ioff + isize)
go !ip
| ip `plusPtr` blockSize >= ipe =
return $ S.PS ifp (ip `minusPtr` ip0) (ipe `minusPtr` ip)
| otherwise = do
putBuilder $ fromByteString (S.PS ifp (ip `minusPtr` ip0) blockSize)
`mappend` fromByteString sep
go (ip `plusPtr` blockSize)
intersperseLazyBlocks :: Int -> Builder -> L.ByteString -> Builder
intersperseLazyBlocks blockSize sep bs =
go (splitLazyAt blockSize bs)
where
go (pre, suf)
| L.null suf = fromLazyByteString pre
| otherwise = fromLazyByteString pre `mappend` sep `mappend`
go (splitLazyAt blockSize suf)
encodeBase64MIME :: S.ByteString -> Builder
encodeBase64MIME =
intersperseLazyBlocks 76 (fromByteString "\r\n") . toLazyByteString . encodeBase64
encodeBase64 :: S.ByteString -> Builder
encodeBase64 = encodeLazyBase64 . L.fromChunks . return
encodeLazyBase64 :: L.ByteString -> Builder
encodeLazyBase64 =
mkBuilder
where
mkBuilder bs = fromPut $ do
remainder <- putWriteLazyBlocks 3 writeBase64 bs
putBuilder $ complete remainder
{-# INLINE writeBase64 #-}
writeBase64 ip =
exactWrite 4 $ \op -> do
b0 <- peekByte 0
b1 <- peekByte 1
b2 <- peekByte 2
let w = (b0 `shiftL` 16) .|. (b1 `shiftL` 8) .|. b2
poke (castPtr $ op ) =<< enc (w `shiftR` 12)
poke (castPtr $ op `plusPtr` 2) =<< enc (w .&. 0xfff)
where
peekByte :: Int -> IO Word32
peekByte off = fmap fromIntegral (peekByteOff ip off :: IO Word8)
enc = peekElemOff (unsafeForeignPtrToPtr encodeTable) . fromIntegral
{-# INLINE complete #-}
complete bs
| S.null bs = mempty
| otherwise = fromWrite $
exactWrite 4 $ \op -> do
let poke6Base64 off sh = pokeByteOff op off
(alphabet `S.unsafeIndex` fromIntegral (w `shiftR` sh .&. 63))
pad off = pokeByteOff op off (fromIntegral $ ord '=' :: Word8)
poke6Base64 0 18
poke6Base64 1 12
if S.length bs == 1 then pad 2
else poke6Base64 2 8
pad 3
where
getByte :: Int -> Int -> Word32
getByte i sh = fromIntegral (bs `S.unsafeIndex` i) `shiftL` sh
w = getByte 0 16 .|. (if S.length bs == 1 then 0 else getByte 1 8)
{-# NOINLINE alphabet #-}
alphabet :: S.ByteString
alphabet = S.pack $ [65..90] ++ [97..122] ++ [48..57] ++ [43,47]
{-# NOINLINE encodeTable #-}
encodeTable :: ForeignPtr Word16
encodeTable = unsafePerformIO $ do
fp <- mallocForeignPtrArray 4096
let ix = fromIntegral . S.index alphabet
withForeignPtr fp $ \p ->
sequence_ [ pokeElemOff p (j*64+k) ((ix k `shiftL` 8) .|. ix j)
| j <- [0..63], k <- [0..63] ]
return fp
{-# INLINE putWriteBlocks #-}
putWriteBlocks :: Int
-> (Ptr Word8 -> Write)
-> S.ByteString
-> Put S.ByteString
putWriteBlocks blockSize write =
\bs -> putBuildStepCont $ step bs
where
step (S.PS ifp ioff isize) !k =
goBS (unsafeForeignPtrToPtr ifp `plusPtr` ioff)
where
!ipe = unsafeForeignPtrToPtr ifp `plusPtr` (ioff + isize)
goBS !ip0 !br@(BufRange op0 ope)
| ip0 `plusPtr` blockSize > ipe = do
touchForeignPtr ifp
let !bs' = S.PS ifp (ip0 `minusPtr` unsafeForeignPtrToPtr ifp)
(ipe `minusPtr` ip0)
k bs' br
| op0 `plusPtr` writeBound < ope =
goPartial (ip0 `plusPtr` (blockSize * min outRemaining inpRemaining))
| otherwise = return $ bufferFull writeBound op0 (goBS ip0)
where
writeBound = getBound' "putWriteBlocks" write
outRemaining = (ope `minusPtr` op0) `div` writeBound
inpRemaining = (ipe `minusPtr` ip0) `div` blockSize
goPartial !ipeTmp = go ip0 op0
where
go !ip !op
| ip < ipeTmp = do
op' <- runWrite (write ip) op
go (ip `plusPtr` blockSize) op'
| otherwise =
goBS ip (BufRange op ope)
{-# INLINE putWriteLazyBlocks #-}
putWriteLazyBlocks :: Int
-> (Ptr Word8 -> Write)
-> L.ByteString
-> Put S.ByteString
putWriteLazyBlocks blockSize write =
go
where
go L.Empty = return S.empty
go (L.Chunk bs lbs) = do
bsRem <- putWriteBlocks blockSize write bs
case S.length bsRem of
lRem
| lRem <= 0 -> go lbs
| otherwise -> do
let (lbsPre, lbsSuf) =
L.splitAt (fromIntegral $ blockSize - lRem) lbs
case S.concat $ bsRem : L.toChunks lbsPre of
block@(S.PS bfp boff bsize)
| bsize < blockSize -> return block
| otherwise -> do
putBuilder $ fromWrite $
write (unsafeForeignPtrToPtr bfp `plusPtr` boff)
putLiftIO $ touchForeignPtr bfp
go lbsSuf
chunks3 :: [Word8] -> [Word32]
chunks3 (b0 : b1 : b2 : bs) =
((fromIntegral b0 `shiftL` 16) .|.
(fromIntegral b1 `shiftL` 8) .|.
(fromIntegral b2 )
) : chunks3 bs
chunks3 _ = []
cmpWriteToLib :: [Word8] -> (L.ByteString, L.ByteString)
cmpWriteToLib bs =
( toLazyByteString $ encodeBase64 $ S.pack bs
, (`L.Chunk` L.empty) $ encode $ S.pack bs )
test3 :: Bool
test3 = uncurry (==) $ cmpWriteToLib $ [0..]
test2 :: L.ByteString
test2 = toLazyByteString $ encodeBase64 $ S.pack [0..]
splitLazyAt :: Int -> L.ByteString -> (L.ByteString, L.ByteString)
splitLazyAt n cs0
| n <= 0 = (L.Empty, cs0)
| otherwise = split cs0
where
split L.Empty = (L.Empty, L.Empty)
split (L.Chunk c cs)
| n < len = case S.splitAt n c of
(pre, suf) -> (L.Chunk pre L.Empty, L.Chunk suf cs)
| otherwise = case splitLazyAt (n - len) cs of
(pre, suf) -> (L.Chunk c pre , suf )
where
len = S.length c
{-# INLINE partitionStrict #-}
partitionStrict :: (Word8 -> Bool) -> S.ByteString -> (S.ByteString, S.ByteString)
partitionStrict f (S.PS ifp ioff ilen) =
second S.reverse $ S.inlinePerformIO $ do
ofp <- S.mallocByteString ilen
withForeignPtr ifp $ wrapper ofp
where
wrapper !ofp !ip0 =
go (ip0 `plusPtr` ioff) op0 (op0 `plusPtr` ilen)
where
op0 = unsafeForeignPtrToPtr ofp
go !ip !opl !oph
| oph == opl = return (S.PS ofp 0 olen, S.PS ofp olen (ilen - olen))
| otherwise = do
x <- peek ip
if f x
then do poke opl x
go (ip `plusPtr` 1) (opl `plusPtr` 1) oph
else do let oph' = oph `plusPtr` (-1)
poke oph' x
go (ip `plusPtr` 1) opl oph'
where
olen = opl `minusPtr` op0
{-# INLINE partitionLazy #-}
partitionLazy :: (Word8 -> Bool) -> L.ByteString -> (L.ByteString, L.ByteString)
partitionLazy f =
L.foldrChunks partitionOne (L.empty, L.empty)
where
partitionOne bs (ls, rs) =
(L.Chunk l ls, L.Chunk r rs)
where
(l, r) = partitionStrict f bs