{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide, prune #-}
module Data.ByteString.Search.Internal.Utils ( kmpBorders
, automaton
, occurs
, suffShifts
, ldrop
, ltake
, lsplit
, release
, keep
, strictify
) where
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Unsafe (unsafeIndex)
import Data.Array.Base (unsafeRead, unsafeWrite, unsafeAt)
import Data.Array.ST
import Data.Array.Unboxed
import Control.Monad (when)
import Data.Bits
import Data.Word (Word8)
{-# INLINE automaton #-}
automaton :: S.ByteString -> UArray Int Int
automaton !pat = runSTUArray (do
let !patLen = S.length pat
{-# INLINE patAt #-}
patAt !i = fromIntegral (unsafeIndex pat i)
!bord = kmpBorders pat
aut <- newArray (0, (patLen + 1)*256 - 1) 0
unsafeWrite aut (patAt 0) 1
let loop !state = do
let !base = state `shiftL` 8
inner j
| j < 0 = if state == patLen
then return aut
else loop (state+1)
| otherwise = do
let !i = base + patAt j
s <- unsafeRead aut i
when (s == 0) (unsafeWrite aut i (j+1))
inner (unsafeAt bord j)
if state == patLen
then inner (unsafeAt bord state)
else inner state
loop 1)
{-# INLINE kmpBorders #-}
kmpBorders :: S.ByteString -> UArray Int Int
kmpBorders pat = runSTUArray (do
let !patLen = S.length pat
{-# INLINE patAt #-}
patAt :: Int -> Word8
patAt i = unsafeIndex pat i
ar <- newArray_ (0, patLen)
unsafeWrite ar 0 (-1)
let dec w j
| j < 0 || w == patAt j = return $! j+1
| otherwise = unsafeRead ar j >>= dec w
bordLoop !i !j
| patLen < i = return ar
| otherwise = do
let !w = patAt (i-1)
j' <- dec w j
if i < patLen && patAt j' == patAt i
then unsafeRead ar j' >>= unsafeWrite ar i
else unsafeWrite ar i j'
bordLoop (i+1) j'
bordLoop 1 (-1))
{-# INLINE occurs #-}
occurs :: S.ByteString -> UArray Int Int
occurs pat = runSTUArray (do
let !patEnd = S.length pat - 1
{-# INLINE patAt #-}
patAt :: Int -> Int
patAt i = fromIntegral (unsafeIndex pat i)
ar <- newArray (0, 255) 1
let loop !i
| i == patEnd = return ar
| otherwise = do
unsafeWrite ar (patAt i) (-i)
loop (i + 1)
loop 0)
{-# INLINE suffShifts #-}
suffShifts :: S.ByteString -> UArray Int Int
suffShifts pat = runSTUArray (do
let !patLen = S.length pat
!patEnd = patLen - 1
!suff = suffLengths pat
ar <- newArray (0,patEnd) patLen
let preShift !idx !j
| idx < 0 = return ()
| suff `unsafeAt` idx == idx + 1 = do
let !shf = patEnd - idx
fillToShf !i
| i == shf = return ()
| otherwise = do
unsafeWrite ar i shf
fillToShf (i + 1)
fillToShf j
preShift (idx - 1) shf
| otherwise = preShift (idx - 1) j
sufShift !idx
| idx == patEnd = return ar
| otherwise = do
unsafeWrite ar (patEnd - unsafeAt suff idx) (patEnd - idx)
sufShift (idx + 1)
preShift (patEnd - 1) 0
sufShift 0)
{-# INLINE suffLengths #-}
suffLengths :: S.ByteString -> UArray Int Int
suffLengths pat = runSTUArray (do
let !patLen = S.length pat
!patEnd = patLen - 1
!preEnd = patEnd - 1
{-# INLINE patAt #-}
patAt i = unsafeIndex pat i
!pe = patAt patEnd
dec !diff !j
| j < 0 || patAt j /= patAt (j + diff) = j
| otherwise = dec diff (j - 1)
ar <- newArray_ (0, patEnd)
unsafeWrite ar patEnd patLen
let noSuff !i
| i < 0 = return ar
| patAt i == pe = do
let !diff = patEnd - i
!nextI = i - 1
!prevI = dec diff nextI
if prevI == nextI
then unsafeWrite ar i 1 >> noSuff nextI
else do unsafeWrite ar i (i - prevI)
suffLoop prevI preEnd nextI
| otherwise = do
unsafeWrite ar i 0
noSuff (i - 1)
suffLoop !pre !end !idx
| idx < 0 = return ar
| pre < idx =
if patAt idx /= pe
then unsafeWrite ar idx 0 >> suffLoop pre (end - 1) (idx - 1)
else do
prevS <- unsafeRead ar end
if pre + prevS < idx
then do unsafeWrite ar idx prevS
suffLoop pre (end - 1) (idx - 1)
else do let !prI = dec (patEnd - idx) pre
unsafeWrite ar idx (idx - prI)
suffLoop prI preEnd (idx - 1)
| otherwise = noSuff idx
noSuff preEnd)
{-# INLINE strictify #-}
strictify :: L.ByteString -> S.ByteString
strictify = S.concat . L.toChunks
{-# INLINE ldrop #-}
ldrop :: Int -> [S.ByteString] -> [S.ByteString]
ldrop _ [] = []
ldrop k (!h : t)
| k < l = S.drop k h : t
| otherwise = ldrop (k - l) t
where
!l = S.length h
{-# INLINE ltake #-}
ltake :: Int -> [S.ByteString] -> [S.ByteString]
ltake _ [] = []
ltake !k (!h : t)
| l < k = h : ltake (k - l) t
| otherwise = [S.take k h]
where
!l = S.length h
{-# INLINE lsplit #-}
lsplit :: Int -> [S.ByteString] -> ([S.ByteString], [S.ByteString])
lsplit _ [] = ([],[])
lsplit !k (!h : t)
= case compare k l of
LT -> ([S.take k h], S.drop k h : t)
EQ -> ([h], t)
GT -> let (u, v) = lsplit (k - l) t in (h : u, v)
where
!l = S.length h
{-# INLINE release #-}
release :: Int -> [S.ByteString] -> [S.ByteString]
release !deep _
| deep <= 0 = []
release !deep (!x:xs) = let !rest = release (deep-S.length x) xs in x : rest
release _ [] = error "stringsearch.release could not find enough past!"
{-# INLINE keep #-}
keep :: Int -> [S.ByteString] -> ([S.ByteString],[S.ByteString])
keep !deep xs
| deep < 1 = ([],xs)
keep deep (!x:xs) = let (!p,d) = keep (deep - S.length x) xs in (x:p,d)
keep _ [] = error "Forgot too much"