{-# LANGUAGE BangPatterns,CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
module Data.Text.Lazy.Encoding
(
decodeASCII
, decodeLatin1
, decodeUtf8
, decodeUtf16LE
, decodeUtf16BE
, decodeUtf32LE
, decodeUtf32BE
, decodeUtf8'
, decodeUtf8With
, decodeUtf16LEWith
, decodeUtf16BEWith
, decodeUtf32LEWith
, decodeUtf32BEWith
, encodeUtf8
, encodeUtf16LE
, encodeUtf16BE
, encodeUtf32LE
, encodeUtf32BE
#if MIN_VERSION_bytestring(0,10,4)
, encodeUtf8Builder
, encodeUtf8BuilderEscaped
#endif
) where
import Control.Exception (evaluate, try)
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldrChunks)
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B
import qualified Data.ByteString.Unsafe as B
#if MIN_VERSION_bytestring(0,10,4)
import Data.Word (Word8)
import Data.Monoid (mempty, (<>))
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Extra as B (safeStrategy, toLazyByteStringWith)
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.Text as T
#endif
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as L
import qualified Data.Text.Internal.Lazy.Encoding.Fusion as E
import qualified Data.Text.Internal.Lazy.Fusion as F
import Data.Text.Unsafe (unsafeDupablePerformIO)
decodeASCII :: B.ByteString -> Text
decodeASCII = decodeUtf8
{-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
decodeLatin1 :: B.ByteString -> Text
decodeLatin1 = foldr (chunk . TE.decodeLatin1) empty . B.toChunks
decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
decodeUtf8With onErr (B.Chunk b0 bs0) =
case TE.streamDecodeUtf8With onErr b0 of
TE.Some t l f -> chunk t (go f l bs0)
where
go f0 _ (B.Chunk b bs) =
case f0 b of
TE.Some t l f -> chunk t (go f l bs)
go _ l _
| S.null l = empty
| otherwise = case onErr desc (Just (B.unsafeHead l)) of
Nothing -> empty
Just c -> L.singleton c
desc = "Data.Text.Lazy.Encoding.decodeUtf8With: Invalid UTF-8 stream"
decodeUtf8With _ _ = empty
decodeUtf8 :: B.ByteString -> Text
decodeUtf8 = decodeUtf8With strictDecode
{-# INLINE[0] decodeUtf8 #-}
decodeUtf8' :: B.ByteString -> Either UnicodeException Text
decodeUtf8' bs = unsafeDupablePerformIO $ do
let t = decodeUtf8 bs
try (evaluate (rnf t `seq` t))
where
rnf Empty = ()
rnf (Chunk _ ts) = rnf ts
{-# INLINE decodeUtf8' #-}
encodeUtf8 :: Text -> B.ByteString
#if MIN_VERSION_bytestring(0,10,4)
encodeUtf8 Empty = B.empty
encodeUtf8 lt@(Chunk t _) =
B.toLazyByteStringWith strategy B.empty $ encodeUtf8Builder lt
where
firstChunkSize = min B.smallChunkSize (4 * (T.length t + 1))
strategy = B.safeStrategy firstChunkSize B.defaultChunkSize
encodeUtf8Builder :: Text -> B.Builder
encodeUtf8Builder = foldrChunks (\c b -> TE.encodeUtf8Builder c <> b) mempty
{-# INLINE encodeUtf8BuilderEscaped #-}
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
encodeUtf8BuilderEscaped prim =
foldrChunks (\c b -> TE.encodeUtf8BuilderEscaped prim c <> b) mempty
#else
encodeUtf8 (Chunk c cs) = B.Chunk (TE.encodeUtf8 c) (encodeUtf8 cs)
encodeUtf8 Empty = B.Empty
#endif
decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs)
{-# INLINE decodeUtf16LEWith #-}
decodeUtf16LE :: B.ByteString -> Text
decodeUtf16LE = decodeUtf16LEWith strictDecode
{-# INLINE decodeUtf16LE #-}
decodeUtf16BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs)
{-# INLINE decodeUtf16BEWith #-}
decodeUtf16BE :: B.ByteString -> Text
decodeUtf16BE = decodeUtf16BEWith strictDecode
{-# INLINE decodeUtf16BE #-}
encodeUtf16LE :: Text -> B.ByteString
encodeUtf16LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16LE) [] txt)
{-# INLINE encodeUtf16LE #-}
encodeUtf16BE :: Text -> B.ByteString
encodeUtf16BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16BE) [] txt)
{-# INLINE encodeUtf16BE #-}
decodeUtf32LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs)
{-# INLINE decodeUtf32LEWith #-}
decodeUtf32LE :: B.ByteString -> Text
decodeUtf32LE = decodeUtf32LEWith strictDecode
{-# INLINE decodeUtf32LE #-}
decodeUtf32BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs)
{-# INLINE decodeUtf32BEWith #-}
decodeUtf32BE :: B.ByteString -> Text
decodeUtf32BE = decodeUtf32BEWith strictDecode
{-# INLINE decodeUtf32BE #-}
encodeUtf32LE :: Text -> B.ByteString
encodeUtf32LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32LE) [] txt)
{-# INLINE encodeUtf32LE #-}
encodeUtf32BE :: Text -> B.ByteString
encodeUtf32BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32BE) [] txt)
{-# INLINE encodeUtf32BE #-}