{-# LANGUAGE BangPatterns, ForeignFunctionInterface, CPP #-}
module ByteStringUtils (
unsafeWithInternals,
unpackPSfromUTF8,
gzReadFilePS,
mmapFilePS,
gzWriteFilePS,
gzWriteFilePSs,
ifHeadThenTail,
dropSpace,
breakSpace,
linesPS,
unlinesPS,
hashPS,
breakFirstPS,
breakLastPS,
substrPS,
readIntPS,
is_funky,
fromHex2PS,
fromPS2Hex,
betweenLinesPS,
break_after_nth_newline,
break_before_nth_newline,
intercalate
) where
import Prelude hiding ( catch )
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Internal as BI
import Data.ByteString (intercalate, uncons)
import Data.ByteString.Internal (fromForeignPtr)
import Control.Exception ( catch )
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
import Foreign.Storable ( peekElemOff, peek )
import Foreign.Marshal.Alloc ( free )
import Foreign.Marshal.Array ( mallocArray, peekArray, advancePtr )
import Foreign.C.Types ( CInt )
import Data.Bits ( rotateL )
import Data.Char ( chr, ord, isSpace )
import Data.Word ( Word8 )
import Data.Int ( Int32 )
import Control.Monad ( when )
import Foreign.Ptr ( nullPtr )
import Foreign.ForeignPtr ( ForeignPtr )
import Foreign.Ptr ( plusPtr, Ptr )
import Foreign.ForeignPtr ( withForeignPtr )
import Foreign.ForeignPtr ( addForeignPtrFinalizer )
import Foreign.Ptr ( FunPtr )
import qualified Data.ByteString.Lazy as BL
import qualified Codec.Compression.GZip as GZ
import Foreign.C.String ( CString, withCString )
import System.IO.MMap( mmapFileByteString )
import System.Mem( performGC )
import System.Posix.Files( fileSize, getSymbolicLinkStatus )
debugForeignPtr :: ForeignPtr a -> String -> IO ()
foreign import ccall unsafe "static fpstring.h debug_alloc" debug_alloc
:: Ptr a -> CString -> IO ()
foreign import ccall unsafe "static fpstring.h & debug_free" debug_free
:: FunPtr (Ptr a -> IO ())
debugForeignPtr fp n =
withCString n $ \cname-> withForeignPtr fp $ \p->
do debug_alloc p cname
addForeignPtrFinalizer debug_free fp
debugForeignPtr _ _ = return ()
unsafeWithInternals :: B.ByteString -> (Ptr Word8 -> Int -> IO a) -> IO a
unsafeWithInternals ps f
= case BI.toForeignPtr ps of
(fp,s,l) -> withForeignPtr fp $ \p -> f (p `plusPtr` s) l
readIntPS :: B.ByteString -> Maybe (Int, B.ByteString)
readIntPS = BC.readInt . BC.dropWhile isSpace
unpackPSfromUTF8 :: B.ByteString -> String
unpackPSfromUTF8 ps =
case BI.toForeignPtr ps of
(_,_, 0) -> ""
(x,s,l) ->
unsafePerformIO $ withForeignPtr x $ \p->
do outbuf <- mallocArray l
lout <- fromIntegral `fmap`
utf8_to_ints outbuf (p `plusPtr` s) (fromIntegral l)
when (lout < 0) $ error "Bad UTF8!"
str <- (map (chr . fromIntegral)) `fmap` peekArray lout outbuf
free outbuf
return str
foreign import ccall unsafe "static fpstring.h utf8_to_ints" utf8_to_ints
:: Ptr Int -> Ptr Word8 -> CInt -> IO CInt
{-# INLINE ifHeadThenTail #-}
ifHeadThenTail :: Word8 -> B.ByteString -> Maybe B.ByteString
ifHeadThenTail c s = case uncons s of
Just (w, t) | w == c -> Just t
_ -> Nothing
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 w =
w == 0x20 ||
w == 0x09 ||
w == 0x0A ||
w == 0x0D
{-# INLINE isSpaceWord8 #-}
firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int
firstnonspace !ptr !n !m
| n >= m = return n
| otherwise = do w <- peekElemOff ptr n
if isSpaceWord8 w then firstnonspace ptr (n+1) m else return n
firstspace :: Ptr Word8 -> Int -> Int -> IO Int
firstspace !ptr !n !m
| n >= m = return n
| otherwise = do w <- peekElemOff ptr n
if (not . isSpaceWord8) w then firstspace ptr (n+1) m else return n
dropSpace :: B.ByteString -> B.ByteString
dropSpace (BI.PS x s l) = BI.inlinePerformIO $ withForeignPtr x $ \p -> do
i <- firstnonspace (p `plusPtr` s) 0 l
return $! if i == l then B.empty else BI.PS x (s+i) (l-i)
{-# INLINE dropSpace #-}
breakSpace :: B.ByteString -> (B.ByteString,B.ByteString)
breakSpace (BI.PS x s l) = BI.inlinePerformIO $ withForeignPtr x $ \p -> do
i <- firstspace (p `plusPtr` s) 0 l
return $! case () of {_
| i == 0 -> (B.empty, BI.PS x s l)
| i == l -> (BI.PS x s l, B.empty)
| otherwise -> (BI.PS x s i, BI.PS x (s+i) (l-i))
}
{-# INLINE breakSpace #-}
{-# INLINE is_funky #-}
is_funky :: B.ByteString -> Bool
is_funky ps = case BI.toForeignPtr ps of
(x,s,l) ->
unsafePerformIO $ withForeignPtr x $ \p->
(/=0) `fmap` has_funky_char (p `plusPtr` s) (fromIntegral l)
foreign import ccall unsafe "fpstring.h has_funky_char" has_funky_char
:: Ptr Word8 -> CInt -> IO CInt
{-# INLINE hashPS #-}
hashPS :: B.ByteString -> Int32
hashPS ps =
case BI.toForeignPtr ps of
(x,s,l) ->
unsafePerformIO $ withForeignPtr x $ \p->
do hash (p `plusPtr` s) l
hash :: Ptr Word8 -> Int -> IO Int32
hash ptr len = f (0 :: Int32) ptr len
where f h _ 0 = return h
f h p n = do x <- peek p
let !h' = (fromIntegral x) + (rotateL h 8)
f h' (p `advancePtr` 1) (n-1)
{-# INLINE substrPS #-}
substrPS :: B.ByteString -> B.ByteString -> Maybe Int
substrPS tok str
| B.null tok = Just 0
| B.length tok > B.length str = Nothing
| otherwise = do n <- BC.elemIndex (BC.head tok) str
let ttok = B.tail tok
reststr = B.drop (n+1) str
if ttok == B.take (B.length ttok) reststr
then Just n
else ((n+1)+) `fmap` substrPS tok reststr
{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakFirstPS c p = case BC.elemIndex c p of
Nothing -> Nothing
Just n -> Just (B.take n p, B.drop (n+1) p)
{-# INLINE breakLastPS #-}
breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakLastPS c p = case BC.elemIndexEnd c p of
Nothing -> Nothing
Just n -> Just (B.take n p, B.drop (n+1) p)
{-# INLINE linesPS #-}
linesPS :: B.ByteString -> [B.ByteString]
linesPS ps
| B.null ps = [B.empty]
| otherwise = BC.split '\n' ps
unlinesPS :: [B.ByteString] -> B.ByteString
unlinesPS [] = BC.empty
unlinesPS x = BC.init $ BC.unlines x
{-# INLINE unlinesPS #-}
foreign import ccall unsafe "static zlib.h gzopen" c_gzopen
:: CString -> CString -> IO (Ptr ())
foreign import ccall unsafe "static zlib.h gzclose" c_gzclose
:: Ptr () -> IO ()
foreign import ccall unsafe "static zlib.h gzread" c_gzread
:: Ptr () -> Ptr Word8 -> CInt -> IO CInt
foreign import ccall unsafe "static zlib.h gzwrite" c_gzwrite
:: Ptr () -> Ptr Word8 -> CInt -> IO CInt
gzReadFilePS :: FilePath -> IO B.ByteString
gzReadFilePS f = do
h <- openBinaryFile f ReadMode
header <- B.hGet h 2
if header /= BC.pack "\31\139"
then do hClose h
mmapFilePS f
else do hSeek h SeekFromEnd (-4)
len <- hGetLittleEndInt h
hClose h
let decompress = GZ.decompressWith GZ.defaultDecompressParams {
GZ.decompressBufferSize = len
}
fmap (B.concat . BL.toChunks . decompress) $
fmap (BL.fromChunks . (:[])) $
B.readFile f
BL.readFile f
withCString f $ \fstr-> withCString "rb" $ \rb-> do
gzf <- c_gzopen fstr rb
when (gzf == nullPtr) $ fail $ "problem opening file "++f
fp <- BI.mallocByteString len
debugForeignPtr fp $ "gzReadFilePS "++f
lread <- withForeignPtr fp $ \p ->
c_gzread gzf p (fromIntegral len)
c_gzclose gzf
when (fromIntegral lread /= len) $
fail $ "problem gzreading file "++f
return $ fromForeignPtr fp 0 len
hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
b1 <- ord `fmap` hGetChar h
b2 <- ord `fmap` hGetChar h
b3 <- ord `fmap` hGetChar h
b4 <- ord `fmap` hGetChar h
return $ b1 + 256*b2 + 65536*b3 + 16777216*b4
gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
gzWriteFilePS f ps = gzWriteFilePSs f [ps]
gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
gzWriteFilePSs f pss =
BL.writeFile f $ GZ.compress $ BL.fromChunks pss
withCString f $ \fstr -> withCString "wb" $ \wb -> do
gzf <- c_gzopen fstr wb
when (gzf == nullPtr) $ fail $ "problem gzopening file for write: "++f
mapM_ (gzWriteToGzf gzf) pss `catch`
\_ -> fail $ "problem gzwriting file: "++f
c_gzclose gzf
gzWriteToGzf :: Ptr () -> B.ByteString -> IO ()
gzWriteToGzf gzf ps = case BI.toForeignPtr ps of
(_,_,0) -> return ()
(x,s,l) -> do
lw <- withForeignPtr x $ \p -> c_gzwrite gzf (p `plusPtr` s)
(fromIntegral l)
when (fromIntegral lw /= l) $ fail $ "problem in gzWriteToGzf"
mmapFilePS :: FilePath -> IO B.ByteString
mmapFilePS f = do
x <- mmapFileByteString f Nothing
`catch` (\_ -> do
size <- fileSize `fmap` getSymbolicLinkStatus f
if size == 0
then return B.empty
else performGC >> mmapFileByteString f Nothing)
return x
mmapFilePS = B.readFile
foreign import ccall unsafe "static fpstring.h conv_to_hex" conv_to_hex
:: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
fromPS2Hex :: B.ByteString -> B.ByteString
fromPS2Hex ps = case BI.toForeignPtr ps of
(x,s,l) ->
BI.unsafeCreate (2*l) $ \p -> withForeignPtr x $ \f ->
conv_to_hex p (f `plusPtr` s) $ fromIntegral l
foreign import ccall unsafe "static fpstring.h conv_from_hex" conv_from_hex
:: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
fromHex2PS :: B.ByteString -> B.ByteString
fromHex2PS ps = case BI.toForeignPtr ps of
(x,s,l) ->
BI.unsafeCreate (l `div` 2) $ \p -> withForeignPtr x $ \f ->
conv_from_hex p (f `plusPtr` s) (fromIntegral $ l `div` 2)
betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
-> Maybe (B.ByteString)
betweenLinesPS start end ps
= case break (start ==) (linesPS ps) of
(_, _:rest@(bs1:_)) ->
case BI.toForeignPtr bs1 of
(ps1,s1,_) ->
case break (end ==) rest of
(_, bs2:_) -> case BI.toForeignPtr bs2 of (_,s2,_) -> Just $ fromForeignPtr ps1 s1 (s2 - s1)
_ -> Nothing
_ -> Nothing
break_after_nth_newline :: Int -> B.ByteString
-> Maybe (B.ByteString, B.ByteString)
break_after_nth_newline 0 the_ps | B.null the_ps = Just (B.empty, B.empty)
break_after_nth_newline n the_ps =
case BI.toForeignPtr the_ps of
(fp,the_s,l) ->
unsafePerformIO $ withForeignPtr fp $ \p ->
do let findit 0 s | s == end = return $ Just (the_ps, B.empty)
findit _ s | s == end = return Nothing
findit 0 s = let left_l = s - the_s
in return $ Just (fromForeignPtr fp the_s left_l,
fromForeignPtr fp s (l - left_l))
findit i s = do w <- peekElemOff p s
if w == nl then findit (i-1) (s+1)
else findit i (s+1)
nl = BI.c2w '\n'
end = the_s + l
findit n the_s
break_before_nth_newline :: Int -> B.ByteString -> (B.ByteString, B.ByteString)
break_before_nth_newline 0 the_ps
| B.null the_ps = (B.empty, B.empty)
break_before_nth_newline n the_ps =
case BI.toForeignPtr the_ps of
(fp,the_s,l) ->
unsafePerformIO $ withForeignPtr fp $ \p ->
do let findit _ s | s == end = return (the_ps, B.empty)
findit i s = do w <- peekElemOff p s
if w == nl
then if i == 0
then let left_l = s - the_s
in return (fromForeignPtr fp the_s left_l,
fromForeignPtr fp s (l - left_l))
else findit (i-1) (s+1)
else findit i (s+1)
nl = BI.c2w '\n'
end = the_s + l
findit n the_s