{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide, prune #-}
module Data.ByteString.Search.Internal.BoyerMoore (
matchLS
, matchSS
, matchNOS
, replaceAllS
, breakSubstringS
, breakAfterS
, splitKeepEndS
, splitKeepFrontS
, splitDropS
) where
import Data.ByteString.Search.Internal.Utils
(occurs, suffShifts, strictify)
import Data.ByteString.Search.Substitution
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Data.ByteString.Unsafe (unsafeIndex)
import Data.Array.Base (unsafeAt)
import Data.Word (Word8)
{-# INLINE matchLS #-}
matchLS :: L.ByteString
-> S.ByteString
-> [Int]
matchLS pat = search
where
search = strictSearcher True (strictify pat)
{-# INLINE matchSS #-}
matchSS :: S.ByteString
-> S.ByteString
-> [Int]
matchSS pat = search
where
search = strictSearcher True pat
{-# INLINE matchNOS #-}
matchNOS :: S.ByteString
-> S.ByteString
-> [Int]
matchNOS pat = search
where
search = strictSearcher False pat
{-# INLINE replaceAllS #-}
replaceAllS :: Substitution rep
=> S.ByteString
-> rep
-> S.ByteString
-> L.ByteString
replaceAllS pat
| S.null pat = \sub -> prependCycle sub . flip LI.chunk LI.Empty
| otherwise =
let repl = strictRepl pat
in \sub -> L.fromChunks . repl (substitution sub)
{-# INLINE breakSubstringS #-}
breakSubstringS :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakSubstringS = strictBreak
breakAfterS :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakAfterS pat
| S.null pat = \str -> (S.empty, str)
breakAfterS pat = breaker
where
!patLen = S.length pat
searcher = strictSearcher False pat
breaker str = case searcher str of
[] -> (str, S.empty)
(i:_) -> S.splitAt (i + patLen) str
{-# INLINE splitKeepEndS #-}
splitKeepEndS :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepEndS = strictSplitKeepEnd
{-# INLINE splitKeepFrontS #-}
splitKeepFrontS :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepFrontS = strictSplitKeepFront
{-# INLINE splitDropS #-}
splitDropS :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitDropS = strictSplitDrop
strictSearcher :: Bool -> S.ByteString -> S.ByteString -> [Int]
strictSearcher _ !pat
| S.null pat = enumFromTo 0 . S.length
| S.length pat == 1 = let !w = S.head pat in S.elemIndices w
strictSearcher !overlap pat = searcher
where
{-# INLINE patAt #-}
patAt :: Int -> Word8
patAt !i = unsafeIndex pat i
!patLen = S.length pat
!patEnd = patLen - 1
!maxLen = maxBound - patLen
!occT = occurs pat
!suffT = suffShifts pat
!skip = if overlap then unsafeAt suffT 0 else patLen
!kept = patLen - skip
!pe = patAt patEnd
{-# INLINE occ #-}
occ !w = unsafeAt occT (fromIntegral w)
{-# INLINE suff #-}
suff !i = unsafeAt suffT i
searcher str
| maxLen < strLen
= error "Overflow in BoyerMoore.strictSearcher"
| maxDiff < 0 = []
| otherwise = checkEnd patEnd
where
!strLen = S.length str
!strEnd = strLen - 1
!maxDiff = strLen - patLen
{-# INLINE strAt #-}
strAt !i = unsafeIndex str i
afterMatch !diff !patI =
case strAt (diff + patI) of
!c | c == patAt patI ->
if patI == kept
then diff : let !diff' = diff + skip
in if maxDiff < diff'
then []
else afterMatch diff' patEnd
else afterMatch diff (patI - 1)
| patI == patEnd ->
checkEnd (diff + 2*patEnd + occ c)
| otherwise ->
let {-# INLINE badShift #-}
badShift = patI + occ c
{-# INLINE goodShift #-}
goodShift = suff patI
!diff' = diff + max badShift goodShift
in if maxDiff < diff'
then []
else checkEnd (diff + patEnd)
checkEnd !sI
| strEnd < sI = []
| otherwise =
case strAt sI of
!c | c == pe -> findMatch (sI - patEnd) (patEnd - 1)
| otherwise -> checkEnd (sI + patEnd + occ c)
findMatch !diff !patI =
case strAt (diff + patI) of
!c | c == patAt patI ->
if patI == 0
then diff : let !diff' = diff + skip
in if maxDiff < diff'
then []
else
if skip == patLen
then
checkEnd (diff' + patEnd)
else
afterMatch diff' patEnd
else findMatch diff (patI - 1)
| otherwise ->
let !diff' = diff + max (patI + occ c) (suff patI)
in if maxDiff < diff'
then []
else checkEnd (diff' + patEnd)
strictBreak :: S.ByteString -> S.ByteString -> (S.ByteString, S.ByteString)
strictBreak pat
| S.null pat = \str -> (S.empty, str)
| otherwise = breaker
where
searcher = strictSearcher False pat
breaker str = case searcher str of
[] -> (str, S.empty)
(i:_) -> S.splitAt i str
strictSplitKeepFront :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepFront pat
| S.null pat = const (repeat S.empty)
strictSplitKeepFront pat = splitter
where
!patLen = S.length pat
searcher = strictSearcher False pat
splitter str
| S.null str = []
| otherwise =
case searcher str of
[] -> [str]
(i:_)
| i == 0 -> psplitter str
| otherwise -> S.take i str : psplitter (S.drop i str)
psplitter !str
| S.null str = []
| otherwise =
case searcher (S.drop patLen str) of
[] -> [str]
(i:_) -> S.take (i + patLen) str :
psplitter (S.drop (i + patLen) str)
strictSplitKeepEnd :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepEnd pat
| S.null pat = const (repeat S.empty)
strictSplitKeepEnd pat = splitter
where
!patLen = S.length pat
searcher = strictSearcher False pat
splitter str
| S.null str = []
| otherwise =
case searcher str of
[] -> [str]
(i:_) -> S.take (i + patLen) str :
splitter (S.drop (i + patLen) str)
strictSplitDrop :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitDrop pat
| S.null pat = const (repeat S.empty)
strictSplitDrop pat = splitter'
where
!patLen = S.length pat
searcher = strictSearcher False pat
splitter' str
| S.null str = []
| otherwise = splitter str
splitter str
| S.null str = [S.empty]
| otherwise =
case searcher str of
[] -> [str]
(i:_) -> S.take i str : splitter (S.drop (i + patLen) str)
strictRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
-> S.ByteString -> [S.ByteString]
strictRepl pat = repl
where
!patLen = S.length pat
searcher = strictSearcher False pat
repl sub = replacer
where
replacer str
| S.null str = []
| otherwise =
case searcher str of
[] -> [str]
(i:_)
| i == 0 -> sub $ replacer (S.drop patLen str)
| otherwise ->
S.take i str : sub (replacer (S.drop (i + patLen) str))