-- | Program to replace HTML tags by whitespace---- This program was originally contributed by Petr Prokhorenkov.---- Tested in this benchmark:---- * Reading the file---- * Replacing text between HTML tags (<>) with whitespace---- * Writing back to a handle--
{-# OPTIONS_GHC -fspec-constr-count=5 #-}
module Benchmarks.Programs.StripTags
( benchmark
) where
import Criterion (Benchmark, bgroup, bench)
import Data.List (mapAccumL)
import System.IO (Handle, hPutStr)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
benchmark :: FilePath -> Handle -> IOBenchmarkbenchmarkio = return$bgroup"StripTags"
[ bench"String"$readFilei>>=hPutStro.string
, bench"ByteString"$B.readFilei>>=B.hPutStro.byteString
, bench"Text"$T.readFilei>>=T.hPutStro.text
, bench"TextByteString"$B.readFilei>>=B.hPutStro.T.encodeUtf8.text.T.decodeUtf8
]
string :: String -> Stringstring = snd.mapAccumLstep0text :: T.Text -> T.Texttext = snd.T.mapAccumLstep0byteString :: B.ByteString -> B.ByteStringbyteString = snd.BC.mapAccumLstep0step :: Int -> Char -> (Int, Char)
stepdc
| d>0||d'>0 = (d', ' ')
| otherwise = (d', c)
where
d' = d+depthcdepth'>' = 1depth'<' = -1depth _ = 0