{-# OPTIONS_GHC -fno-warn-unused-imports #-}
-- ignore warning from 'import Data.Text.Encoding'-- |-- Module : Blaze.ByteString.Builder.Char.Utf8-- Copyright : (c) 2010 Jasper Van der Jeugt & Simon Meier-- License : BSD3-style (see LICENSE)---- Maintainer : Simon Meier <iridcode@gmail.com>-- Stability : experimental-- Portability : tested on GHC only---- 'Write's and 'Builder's for serializing Unicode characters using the UTF-8-- encoding.--
module Blaze.ByteString.Builder.Char.Utf8
(
-- * Writing UTF-8 encoded characters to a buffer
writeChar
-- * Creating Builders from UTF-8 encoded characters
, fromChar
, fromString
, fromShow
, fromText
, fromLazyText
) where
import Foreign
import Data.Char (ord)
import qualified Data.Text as TS
import qualified Data.Text.Encoding as TS -- imported for documentation links
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TS -- imported for documentation links
import Blaze.ByteString.Builder.Internal
-- | Write a UTF-8 encoded Unicode character to a buffer.--{-# INLINE writeChar #-}writeChar :: Char -> WritewriteCharc = boundedWrite4 (encodeCharUtf8f1f2f3f4c)
where
f1x1 = pokeN1$ \op -> do pokeByteOffop0x1f2x1x2 = pokeN2$ \op -> do pokeByteOffop0x1pokeByteOffop1x2f3x1x2x3 = pokeN3$ \op -> do pokeByteOffop0x1pokeByteOffop1x2pokeByteOffop2x3f4x1x2x3x4 = pokeN4$ \op -> do pokeByteOffop0x1pokeByteOffop1x2pokeByteOffop2x3pokeByteOffop3x4-- | Encode a Unicode character to another datatype, using UTF-8. This function-- acts as an abstract way of encoding characters, as it is unaware of what-- needs to happen with the resulting bytes: you have to specify functions to-- deal with those.--encodeCharUtf8 :: (Word8 -> a) -- ^ 1-byte UTF-8
-> (Word8 -> Word8 -> a) -- ^ 2-byte UTF-8
-> (Word8 -> Word8 -> Word8 -> a) -- ^ 3-byte UTF-8
-> (Word8 -> Word8 -> Word8 -> Word8 -> a) -- ^ 4-byte UTF-8
-> Char-- ^ Input 'Char'
-> a-- ^ ResultencodeCharUtf8f1f2f3f4c = case ordc of
x | x<=0x7F -> f1$fromIntegralx
| x<=0x07FF ->
let x1 = fromIntegral$ (x`shiftR`6) +0xC0x2 = fromIntegral$ (x.&.0x3F) +0x80
in f2x1x2
| x<=0xFFFF ->
let x1 = fromIntegral$ (x`shiftR`12) +0xE0x2 = fromIntegral$ ((x`shiftR`6) .&.0x3F) +0x80x3 = fromIntegral$ (x.&.0x3F) +0x80
in f3x1x2x3
| otherwise ->
let x1 = fromIntegral$ (x`shiftR`18) +0xF0x2 = fromIntegral$ ((x`shiftR`12) .&.0x3F) +0x80x3 = fromIntegral$ ((x`shiftR`6) .&.0x3F) +0x80x4 = fromIntegral$ (x.&.0x3F) +0x80
in f4x1x2x3x4{-# INLINE encodeCharUtf8 #-}-- | /O(1)/. Serialize a Unicode character using the UTF-8 encoding.--fromChar :: Char -> BuilderfromChar = fromWriteSingletonwriteChar-- | /O(n)/. Serialize a Unicode 'String' using the UTF-8 encoding.--fromString :: String -> BuilderfromString = fromWriteListwriteChar-- Performance note: ^^^---- fromWrite2List made things slightly worse for the blaze-html benchmarks-- despite being better when serializing only a list. Probably, the cache is-- already occupied enough with dealing with the data from Html rendering.---- | /O(n)/. Serialize a value by 'Show'ing it and UTF-8 encoding the resulting-- 'String'.--fromShow :: Show a =>a -> BuilderfromShow = fromString.show-- | /O(n)/. Serialize a strict Unicode 'TS.Text' value using the UTF-8 encoding.--fromText :: TS.Text -> BuilderfromText = fromString.TS.unpack{-# INLINE fromText #-}-- | /O(n)/. Serialize a lazy Unicode 'TL.Text' value using the UTF-8 encoding.--fromLazyText :: TL.Text -> BuilderfromLazyText = fromString.TL.unpack{-# INLINE fromLazyText #-}