Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate UTF-8 hPutStr to standard hPutStr #589

Merged
merged 5 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions src/Data/Text/IO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ import qualified Control.Exception as E
import Control.Monad (liftM2, when)
import Data.IORef (readIORef)
import qualified Data.Text as T
import Data.Text.Internal.Fusion (stream, streamLn)
import Data.Text.Internal.IO (hGetLineWith, readChunk, hPutStream)
import Data.Text.Internal.IO (hGetLineWith, readChunk, hPutStr, hPutStrLn)
import GHC.IO.Buffer (CharBuffer, isEmptyBuffer)
import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType))
import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle)
Expand Down Expand Up @@ -166,14 +165,6 @@ chooseGoodBuffering h = do
hGetLine :: Handle -> IO Text
hGetLine = hGetLineWith T.concat

-- | Write a string to a handle.
hPutStr :: Handle -> Text -> IO ()
hPutStr h = hPutStream h . stream

-- | Write a string to a handle, followed by a newline.
hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn h = hPutStream h . streamLn

-- | The 'interact' function takes a function of type @Text -> Text@
-- as its argument. The entire input from the standard input device is
-- passed to this function as its argument, and the resulting string
Expand Down
47 changes: 41 additions & 6 deletions src/Data/Text/Internal/IO.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE BangPatterns, RecordWildCards #-}
{-# LANGUAGE MagicHash #-}
-- |
-- Module : Data.Text.Internal.IO
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan,
Expand All @@ -19,16 +20,22 @@ module Data.Text.Internal.IO
hGetLineWith
, readChunk
, hPutStream
, hPutStr
, hPutStrLn
) where

import qualified Control.Exception as E
import qualified Data.ByteString as B
import Data.ByteString.Builder (hPutBuilder, charUtf8)
import Data.IORef (readIORef, writeIORef)
import Data.Text (Text)
import Data.Text.Internal.Fusion (unstream)
import Data.Text.Encoding (encodeUtf8, encodeUtf8Builder)
import Data.Text.Internal.Fusion (stream, streamLn, unstream)
import Data.Text.Internal.Fusion.Types (Step(..), Stream(..))
import Data.Text.Internal.Fusion.Size (exactSize, maxSize)
import Data.Text.Unsafe (inlinePerformIO)
import Foreign.Storable (peekElemOff)
import GHC.Exts (reallyUnsafePtrEquality#, isTrue#)
import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBuffer, RawCharBuffer,
bufferAdjustL, bufferElems, charSize, emptyBuffer,
isEmptyBuffer, newCharBuffer, readCharBuf, withRawBuffer,
Expand All @@ -37,7 +44,7 @@ import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_,
wantWritableHandle)
import GHC.IO.Handle.Text (commitBuffer')
import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), Newline(..))
import System.IO (Handle, hPutChar)
import System.IO (Handle, hPutChar, utf8)
import System.IO.Error (isEOFError)
import qualified Data.Text as T

Expand Down Expand Up @@ -168,17 +175,45 @@ readChunk hh@Handle__{..} buf = do

-- | Print a @Stream Char@.
hPutStream :: Handle -> Stream Char -> IO ()
-- This function is lifted almost verbatim from GHC.IO.Handle.Text.
hPutStream h str = do
(buffer_mode, nl) <-
hPutStream h str = hPutStreamOrUtf8 h str Nothing

-- | Write a string to a handle.
hPutStr :: Handle -> Text -> IO ()
hPutStr h t = hPutStreamOrUtf8 h (stream t) (Just putUtf8)
where
putUtf8 = B.hPutStr h (encodeUtf8 t)

-- | Write a string to a handle, followed by a newline.
hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn h t = hPutStreamOrUtf8 h (streamLn t) (Just putUtf8)
where
-- Not using B.hPutStrLn because it's not necessarily atomic:
-- https://github.com/haskell/bytestring/issues/200
putUtf8 = hPutBuilder h (encodeUtf8Builder t <> charUtf8 '\n')

-- | 'hPutStream' with an optional special case when the output encoding is
-- UTF-8 and without newline conversion.
hPutStreamOrUtf8 :: Handle -> Stream Char -> Maybe (IO ()) -> IO ()
-- This function is modified from GHC.IO.Handle.Text.
hPutStreamOrUtf8 h str mPutUtf8 = do
(buffer_mode, nl, isUtf8) <-
wantWritableHandle "hPutStr" h $ \h_ -> do
bmode <- getSpareBuffer h_
return (bmode, haOutputNL h_)
return (bmode, haOutputNL h_, eqUTF8 h_)
case buffer_mode of
_ | Just putUtf8 <- mPutUtf8, nl == LF && isUtf8 -> putUtf8
(NoBuffering, _) -> hPutChars h str
(LineBuffering, buf) -> writeLines h nl buf str
(BlockBuffering _, buf) -> writeBlocks (nl == CRLF) h buf str

where
-- If the encoding is UTF-8, it's most likely pointer-equal to
-- 'System.IO.utf8', letting us avoid a String comparison.
-- If it is somehow UTF-8 but not pointer-equal to 'utf8',
-- we will just take a slower branch, but the result is still correct.
eqUTF8 = maybe False (\enc -> isTrue# (reallyUnsafePtrEquality# utf8 enc)) . haCodec
{-# INLINE hPutStreamOrUtf8 #-}

hPutChars :: Handle -> Stream Char -> IO ()
hPutChars h (Stream next0 s0 _len) = loop s0
where
Expand Down
Loading