{-# LANGUAGE BangPatterns, FlexibleInstances, GADTs, OverloadedStrings,
Rank2Types, RecordWildCards, TypeFamilies, TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Attoparsec.Text.Internal
(
Parser
, Result
, parse
, parseOnly
, module Data.Attoparsec.Combinator
, satisfy
, satisfyWith
, anyChar
, skip
, char
, notChar
, peekChar
, peekChar'
, inClass
, notInClass
, skipWhile
, string
, stringCI
, asciiCI
, take
, scan
, runScanner
, takeWhile
, takeWhile1
, takeTill
, takeText
, takeLazyText
, endOfLine
, endOfInput
, match
, atEnd
) where
import Control.Applicative ((<|>), (<$>))
import Control.Monad (when)
import Data.Attoparsec.Combinator ((<?>))
import Data.Attoparsec.Internal
import Data.Attoparsec.Internal.Types hiding (Parser, Failure, Success)
import qualified Data.Attoparsec.Text.Buffer as Buf
import Data.Attoparsec.Text.Buffer (Buffer, buffer)
import Data.Char (chr, ord)
import Data.String (IsString(..))
import Data.Text.Internal (Text(..))
import Prelude hiding (getChar, length, succ, take, takeWhile)
import qualified Data.Attoparsec.Internal.Types as T
import qualified Data.Attoparsec.Text.FastSet as Set
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Unsafe as T
type Parser = T.Parser Text
type Result = IResult Text
type Failure r = T.Failure Text Buffer r
type Success a r = T.Success Text Buffer a r
instance (a ~ Text) => IsString (Parser a) where
fromString = string . T.pack
satisfy :: (Char -> Bool) -> Parser Char
satisfy p = do
(k,c) <- ensure 1
let !h = T.unsafeHead c
if p h
then advance k >> return h
else fail "satisfy"
{-# INLINE satisfy #-}
skip :: (Char -> Bool) -> Parser ()
skip p = do
(k,s) <- ensure 1
if p (T.unsafeHead s)
then advance k
else fail "skip"
satisfyWith :: (Char -> a) -> (a -> Bool) -> Parser a
satisfyWith f p = do
(k,s) <- ensure 1
let c = f $! T.unsafeHead s
if p c
then advance k >> return c
else fail "satisfyWith"
{-# INLINE satisfyWith #-}
takeWith :: Int -> (Text -> Bool) -> Parser Text
takeWith n p = do
(k,s) <- ensure n
if p s
then advance k >> return s
else fail "takeWith"
take :: Int -> Parser Text
take n = takeWith (max n 0) (const True)
{-# INLINE take #-}
string :: Text -> Parser Text
string s = takeWith (T.length s) (==s)
{-# INLINE string #-}
stringCI :: Text -> Parser Text
stringCI s = go 0
where
go !n
| n > T.length fs = fail "stringCI"
| otherwise = do
(k,t) <- ensure n
if T.toCaseFold t == fs
then advance k >> return t
else go (n+1)
fs = T.toCaseFold s
{-# INLINE stringCI #-}
{-# DEPRECATED stringCI "this is very inefficient, use asciiCI instead" #-}
asciiCI :: Text -> Parser Text
asciiCI input = do
(k,t) <- ensure n
if asciiToLower t == s
then advance k >> return t
else fail "asciiCI"
where
n = T.length input
s = asciiToLower input
asciiToLower = T.map f
where
offset = ord 'a' - ord 'A'
f c | 'A' <= c && c <= 'Z' = chr (ord c + offset)
| otherwise = c
{-# INLINE asciiCI #-}
skipWhile :: (Char -> Bool) -> Parser ()
skipWhile p = go
where
go = do
t <- T.takeWhile p <$> get
continue <- inputSpansChunks (length t)
when continue go
{-# INLINE skipWhile #-}
takeTill :: (Char -> Bool) -> Parser Text
takeTill p = takeWhile (not . p)
{-# INLINE takeTill #-}
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile p = (T.concat . reverse) `fmap` go []
where
go acc = do
h <- T.takeWhile p <$> get
continue <- inputSpansChunks (length h)
if continue
then go (h:acc)
else return (h:acc)
takeRest :: Parser [Text]
takeRest = go []
where
go acc = do
input <- wantInput
if input
then do
s <- get
advance (length s)
go (s:acc)
else return (reverse acc)
takeText :: Parser Text
takeText = T.concat `fmap` takeRest
takeLazyText :: Parser L.Text
takeLazyText = L.fromChunks `fmap` takeRest
data Scan s = Continue s
| Finished {-# UNPACK #-} !Int Text
scan_ :: (s -> [Text] -> Parser r) -> s -> (s -> Char -> Maybe s) -> Parser r
scan_ f s0 p = go [] s0
where
scanner s !n t =
case T.uncons t of
Just (c,t') -> case p s c of
Just s' -> scanner s' (n+1) t'
Nothing -> Finished n t
Nothing -> Continue s
go acc s = do
input <- get
case scanner s 0 input of
Continue s' -> do continue <- inputSpansChunks (length input)
if continue
then go (input : acc) s'
else f s' (input : acc)
Finished n t -> do advance (length input - length t)
f s (T.take n input : acc)
{-# INLINE scan_ #-}
scan :: s -> (s -> Char -> Maybe s) -> Parser Text
scan = scan_ $ \_ chunks ->
case chunks of
[x] -> return x
xs -> return . T.concat . reverse $ xs
{-# INLINE scan #-}
runScanner :: s -> (s -> Char -> Maybe s) -> Parser (Text, s)
runScanner = scan_ $ \s xs -> return (T.concat (reverse xs), s)
{-# INLINE runScanner #-}
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 p = do
(`when` demandInput) =<< endOfChunk
h <- T.takeWhile p <$> get
let len = length h
when (len == 0) $ fail "takeWhile1"
advance len
eoc <- endOfChunk
if eoc
then (h<>) `fmap` takeWhile p
else return h
inClass :: String -> Char -> Bool
inClass s = (`Set.member` mySet)
where mySet = Set.charClass s
{-# NOINLINE mySet #-}
{-# INLINE inClass #-}
notInClass :: String -> Char -> Bool
notInClass s = not . inClass s
{-# INLINE notInClass #-}
anyChar :: Parser Char
anyChar = satisfy $ const True
{-# INLINE anyChar #-}
char :: Char -> Parser Char
char c = satisfy (== c) <?> show c
{-# INLINE char #-}
notChar :: Char -> Parser Char
notChar c = satisfy (/= c) <?> "not " ++ show c
{-# INLINE notChar #-}
peekChar :: Parser (Maybe Char)
peekChar = T.Parser $ \t pos more _lose succ ->
case () of
_| pos < lengthOf t ->
let T.Iter !c _ = Buf.iter t (fromPos pos)
in succ t pos more (Just c)
| more == Complete ->
succ t pos more Nothing
| otherwise ->
let succ' t' pos' more' =
let T.Iter !c _ = Buf.iter t' (fromPos pos')
in succ t' pos' more' (Just c)
lose' t' pos' more' = succ t' pos' more' Nothing
in prompt t pos more lose' succ'
{-# INLINE peekChar #-}
peekChar' :: Parser Char
peekChar' = do
(_,s) <- ensure 1
return $! T.unsafeHead s
{-# INLINE peekChar' #-}
endOfLine :: Parser ()
endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())
failK :: Failure a
failK t (Pos pos) _more stack msg = Fail (Buf.dropWord16 pos t) stack msg
{-# INLINE failK #-}
successK :: Success a a
successK t (Pos pos) _more a = Done (Buf.dropWord16 pos t) a
{-# INLINE successK #-}
parse :: Parser a -> Text -> Result a
parse m s = runParser m (buffer s) 0 Incomplete failK successK
{-# INLINE parse #-}
parseOnly :: Parser a -> Text -> Either String a
parseOnly m s = case runParser m (buffer s) 0 Complete failK successK of
Fail _ _ err -> Left err
Done _ a -> Right a
_ -> error "parseOnly: impossible error!"
{-# INLINE parseOnly #-}
get :: Parser Text
get = T.Parser $ \t pos more _lose succ ->
succ t pos more (Buf.dropWord16 (fromPos pos) t)
{-# INLINE get #-}
endOfChunk :: Parser Bool
endOfChunk = T.Parser $ \t pos more _lose succ ->
succ t pos more (pos == lengthOf t)
{-# INLINE endOfChunk #-}
inputSpansChunks :: Pos -> Parser Bool
inputSpansChunks i = T.Parser $ \t pos_ more _lose succ ->
let pos = pos_ + i
in if pos < lengthOf t || more == Complete
then succ t pos more False
else let lose' t' pos' more' = succ t' pos' more' False
succ' t' pos' more' = succ t' pos' more' True
in prompt t pos more lose' succ'
{-# INLINE inputSpansChunks #-}
advance :: Pos -> Parser ()
advance n = T.Parser $ \t pos more _lose succ -> succ t (pos+n) more ()
{-# INLINE advance #-}
ensureSuspended :: Int -> Buffer -> Pos -> More
-> Failure r -> Success (Pos, Text) r
-> Result r
ensureSuspended n t pos more lose succ =
runParser (demandInput >> go) t pos more lose succ
where go = T.Parser $ \t' pos' more' lose' succ' ->
case lengthAtLeast pos' n t' of
Just n' -> succ' t' pos' more' (n', substring pos n' t')
Nothing -> runParser (demandInput >> go) t' pos' more' lose' succ'
ensure :: Int -> Parser (Pos, Text)
ensure n = T.Parser $ \t pos more lose succ ->
case lengthAtLeast pos n t of
Just n' -> succ t pos more (n', substring pos n' t)
Nothing -> ensureSuspended n t pos more lose succ
{-# INLINE ensure #-}
match :: Parser a -> Parser (Text, a)
match p = T.Parser $ \t pos more lose succ ->
let succ' t' pos' more' a = succ t' pos' more'
(substring pos (pos'-pos) t', a)
in runParser p t pos more lose succ'
lengthAtLeast :: Pos -> Int -> Buffer -> Maybe Pos
lengthAtLeast pos n t = go 0 (fromPos pos)
where go i !p
| i == n = Just (Pos p - pos)
| p == len = Nothing
| otherwise = go (i+1) (p + Buf.iter_ t p)
Pos len = lengthOf t
{-# INLINE lengthAtLeast #-}
substring :: Pos -> Pos -> Buffer -> Text
substring (Pos pos) (Pos n) = Buf.substring pos n
{-# INLINE substring #-}
lengthOf :: Buffer -> Pos
lengthOf = Pos . Buf.length
length :: Text -> Pos
length = Pos . T.length