{-# LANGUAGE BangPatterns #-}
module Data.ByteString.Search.DFA (
indices
, nonOverlappingIndices
, breakOn
, breakAfter
, replace
, split
, splitKeepEnd
, splitKeepFront
) where
import Data.ByteString.Search.Internal.Utils (automaton)
import Data.ByteString.Search.Substitution
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Data.ByteString.Unsafe (unsafeIndex)
import Data.Array.Base (unsafeAt)
import Data.Bits
{-# INLINE indices #-}
indices :: S.ByteString
-> S.ByteString
-> [Int]
indices = strictSearcher True
{-# INLINE nonOverlappingIndices #-}
nonOverlappingIndices :: S.ByteString
-> S.ByteString
-> [Int]
nonOverlappingIndices = strictSearcher False
breakOn :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakOn pat = breaker
where
searcher = strictSearcher False pat
breaker str = case searcher str of
[] -> (str, S.empty)
(i:_) -> S.splitAt i str
breakAfter :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakAfter pat = breaker
where
!patLen = S.length pat
searcher = strictSearcher False pat
breaker str = case searcher str of
[] -> (str, S.empty)
(i:_) -> S.splitAt (i + patLen) str
replace :: Substitution rep
=> S.ByteString
-> rep
-> S.ByteString
-> L.ByteString
replace pat
| S.null pat = \sub -> prependCycle sub . flip LI.chunk LI.Empty
| otherwise =
let !patLen = S.length pat
searcher = strictSearcher False pat
repl sub =
let {-# NOINLINE subst #-}
!subst = substitution sub
replacer str
| S.null str = []
| otherwise =
case searcher str of
[] -> [str]
(i:_)
| i == 0 -> subst $ replacer (S.drop patLen str)
| otherwise -> S.take i str : subst
(replacer (S.drop (i + patLen) str))
in replacer
in \sub -> L.fromChunks . repl sub
split :: S.ByteString
-> S.ByteString
-> [S.ByteString]
split pat
| S.null pat = const (repeat S.empty)
split pat = splitter
where
!patLen = S.length pat
searcher = strictSearcher False pat
splitter str
| S.null str = []
| otherwise = splitter' str
splitter' str
| S.null str = [S.empty]
| otherwise =
case searcher str of
[] -> [str]
(i:_) -> S.take i str : splitter' (S.drop (i + patLen) str)
splitKeepEnd :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepEnd pat
| S.null pat = const (repeat S.empty)
splitKeepEnd pat = splitter
where
!patLen = S.length pat
searcher = strictSearcher False pat
splitter str
| S.null str = []
| otherwise =
case searcher str of
[] -> [str]
(i:_) -> S.take (i + patLen) str :
splitter (S.drop (i + patLen) str)
splitKeepFront :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepFront pat
| S.null pat = const (repeat S.empty)
splitKeepFront pat = splitter
where
!patLen = S.length pat
searcher = strictSearcher False pat
splitter str
| S.null str = []
| otherwise =
case searcher str of
[] -> [str]
(i:rst)
| i == 0 -> case rst of
[] -> [str]
(j:_) -> S.take j str : splitter' (S.drop j str)
| otherwise -> S.take i str : splitter' (S.drop i str)
splitter' str
| S.null str = []
| otherwise =
case searcher (S.drop patLen str) of
[] -> [str]
(i:_) -> S.take (i + patLen) str :
splitter' (S.drop (i + patLen) str)
strictSearcher :: Bool -> S.ByteString -> S.ByteString -> [Int]
strictSearcher _ !pat
| S.null pat = enumFromTo 0 . S.length
| S.length pat == 1 = let !w = S.head pat in S.elemIndices w
strictSearcher !overlap pat = search
where
!patLen = S.length pat
!auto = automaton pat
!p0 = unsafeIndex pat 0
!ams = if overlap then patLen else 0
search str = match 0 0
where
!strLen = S.length str
{-# INLINE strAt #-}
strAt :: Int -> Int
strAt !i = fromIntegral (unsafeIndex str i)
match 0 idx
| idx == strLen = []
| unsafeIndex str idx == p0 = match 1 (idx + 1)
| otherwise = match 0 (idx + 1)
match state idx
| idx == strLen = []
| otherwise =
let !nstate = unsafeAt auto ((state `shiftL` 8) + strAt idx)
!nxtIdx = idx + 1
in if nstate == patLen
then (nxtIdx - patLen) : match ams nxtIdx
else match nstate nxtIdx