{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
module GHC.IO.Encoding.Failure (
CodingFailureMode(..), codingFailureModeSuffix,
isSurrogate,
recoverDecode, recoverEncode
) where
import GHC.IO
import GHC.IO.Buffer
import GHC.IO.Exception
import GHC.Base
import GHC.Char
import GHC.Word
import GHC.Show
import GHC.Num
import GHC.Real ( fromIntegral )
import Data.Maybe
data CodingFailureMode
= ErrorOnCodingFailure
| IgnoreCodingFailure
| TransliterateCodingFailure
| RoundtripFailure
deriving (Show)
codingFailureModeSuffix :: CodingFailureMode -> String
codingFailureModeSuffix ErrorOnCodingFailure = ""
codingFailureModeSuffix IgnoreCodingFailure = "//IGNORE"
codingFailureModeSuffix TransliterateCodingFailure = "//TRANSLIT"
codingFailureModeSuffix RoundtripFailure = "//ROUNDTRIP"
unrepresentableChar :: Char
unrepresentableChar = '\xFFFD'
{-# INLINE isSurrogate #-}
isSurrogate :: Char -> Bool
isSurrogate c = (0xD800 <= x && x <= 0xDBFF)
|| (0xDC00 <= x && x <= 0xDFFF)
where x = ord c
{-# INLINE escapeToRoundtripCharacterSurrogate #-}
escapeToRoundtripCharacterSurrogate :: Word8 -> Char
escapeToRoundtripCharacterSurrogate b
| b < 128 = chr (fromIntegral b)
| otherwise = chr (0xDC00 + fromIntegral b)
{-# INLINE unescapeRoundtripCharacterSurrogate #-}
unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate c
| 0xDC80 <= x && x < 0xDD00 = Just (fromIntegral x)
| otherwise = Nothing
where x = ord c
recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char
-> IO (Buffer Word8, Buffer Char)
recoverDecode cfm input@Buffer{ bufRaw=iraw, bufL=ir, bufR=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow } = do
case cfm of
ErrorOnCodingFailure -> ioe_decodingError
IgnoreCodingFailure -> return (input { bufL=ir+1 }, output)
TransliterateCodingFailure -> do
ow' <- writeCharBuf oraw ow unrepresentableChar
return (input { bufL=ir+1 }, output { bufR=ow' })
RoundtripFailure -> do
b <- readWord8Buf iraw ir
ow' <- writeCharBuf oraw ow (escapeToRoundtripCharacterSurrogate b)
return (input { bufL=ir+1 }, output { bufR=ow' })
recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8
-> IO (Buffer Char, Buffer Word8)
recoverEncode cfm input@Buffer{ bufRaw=iraw, bufL=ir, bufR=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow } = do
(c,ir') <- readCharBuf iraw ir
case cfm of
IgnoreCodingFailure -> return (input { bufL=ir' }, output)
TransliterateCodingFailure -> do
if c == '?'
then return (input { bufL=ir' }, output)
else do
_ir' <- writeCharBuf iraw ir '?'
return (input, output)
RoundtripFailure | Just x <- unescapeRoundtripCharacterSurrogate c -> do
writeWord8Buf oraw ow x
return (input { bufL=ir' }, output { bufR=ow+1 })
_ -> ioe_encodingError
ioe_decodingError :: IO a
ioe_decodingError = ioException
(IOError Nothing InvalidArgument "recoverDecode"
"invalid byte sequence" Nothing Nothing)
ioe_encodingError :: IO a
ioe_encodingError = ioException
(IOError Nothing InvalidArgument "recoverEncode"
"invalid character" Nothing Nothing)