Haskell: Real World Code Sample

,

Here's a example of a real world haskell source code. It is from David Roundy's darcs project at http://darcs.net/.

{-# OPTIONS -fffi #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  FastPackedString
-- Copyright   :  (c) The University of Glasgow 2001, David Roundy 2003
-- License : GPL (I'm happy to also license this file BSD style but don't
--           want to bother distributing two license files with darcs.
--
-- Maintainer  :  droundy@abridgegame.org
-- Stability   :  experimental
-- Portability :  portable
--
-- An efficient implementation of strings.
--
-----------------------------------------------------------------------------

-- Original GHC implementation by Bryan O\'Sullivan,
-- rewritten to use UArray by Simon Marlow.
-- rewritten to support slices and use ForeignPtr by David Roundy

module FastPackedString (
        -- * The @PackedString@ type
        PackedString,      -- abstract, instances: Eq, Ord, Show, Typeable
        unsafeWithInternals, -- :: PackedString -> (Ptr Word8 -> Int -> IO a) -> IO a

         -- * Converting to and from @PackedString@s
    generatePS,  -- :: Int -> (Ptr Word8 -> Int -> IO Int) -> IO PackedString
        packString,  -- :: String -> PackedString
        packWords,   -- :: [Word8] -> PackedString
        unpackPS,    -- :: PackedString -> String
        unpackWords, -- :: PackedString -> [Word8]
        unpackPSfromUTF8, -- :: PackedString -> String

        -- * I\/O with @PackedString@s
        hPutPS,      -- :: Handle -> PackedString -> IO ()
        hGetPS,      -- :: Handle -> Int -> IO PackedString
    hGetContentsPS, -- :: Handle -> IO PackedString
        readFilePS,  -- :: FilePath -> IO PackedString
        writeFilePS, -- :: FilePath -> PackedString -> IO ()
        gzReadFilePS,-- :: FilePath -> IO PackedString
        mmapFilePS,  -- :: FilePath -> IO PackedString

        -- * List-like manipulation functions
        nilPS,       -- :: PackedString
        consPS,      -- :: Char -> PackedString -> PackedString
        headPS,      -- :: PackedString -> Char
        tailPS,      -- :: PackedString -> PackedString
        lastPS,      -- :: PackedString -> Char
        nullPS,      -- :: PackedString -> Bool
        appendPS,    -- :: PackedString -> PackedString -> PackedString
        lengthPS,    -- :: PackedString -> Int
        indexPS,     -- :: PackedString -> Int -> Char
        indexPSW,    -- :: PackedString -> Int -> Word8
        mapPS,       -- :: (Char -> Char) -> PackedString -> PackedString
        --filterPS,    -- :: (Char -> Bool) -> PackedString -> PackedString
        reversePS,   -- :: PackedString -> PackedString
        concatPS,    -- :: [PackedString] -> PackedString
        concatLenPS, -- :: Int -> [PackedString] -> PackedString
        elemPS,      -- :: Char -> PackedString -> Bool
        takePS,      -- :: Int -> PackedString -> PackedString
        dropPS,      -- :: Int -> PackedString -> PackedString
        splitAtPS,   -- :: Int -> PackedString -> (PackedString, PackedString)

        foldlPS,     -- :: (a -> Char -> a) -> a -> PackedString -> a
        foldrPS,     -- :: (Char -> a -> a) -> a -> PackedString -> a
        anyPS,
        takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
        dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
        dropWhitePS, -- :: PackedString -> PackedString
        breakWhitePS,-- :: PackedString -> Maybe (PackedString,PackedString)
        spanPS,      -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        spanEndPS,   -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        breakPS,     -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
        breakOnPS,   -- :: Char -> PackedString -> (PackedString, PackedString)
        linesPS,     -- :: PackedString -> [PackedString]
        unlinesPS,     -- :: [PackedString] -> PackedString
        findPS,

        wordsPS,     -- :: PackedString -> [PackedString]
        splitPS,     -- :: Char -> PackedString -> [PackedString]
        splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]

--      joinPS,      -- :: PackedString -> [PackedString] -> PackedString

        breakFirstPS,-- :: Char -> PackedString -> Maybe (PackedString,PackedString)
        breakLastPS, -- :: Char -> PackedString -> Maybe (PackedString,PackedString)
        readIntPS,   -- :: PackedString -> Maybe (Int, PackedString)
        mylexPS,     -- :: PackedString -> Maybe (PackedString, PackedString)
        is_funky,    -- :: PackedString -> Bool
        fromHex2PS,  -- :: PackedString -> PackedString
        fromPS2Hex,  -- :: PackedString -> PackedString
        betweenLinesPS,--  :: PackedString -> PackedString -> PackedString -> Maybe (PackedString)
    ) where

import IO ( Handle, hClose, hFileSize, IOMode(ReadMode,WriteMode),
            hSeek, SeekMode(SeekFromEnd), hGetChar )

import Autoconf ( use_mmap, use_hideous_malloc_hack )

import System.Mem ( performGC )
import Foreign.Storable ( peekElemOff, peek, poke )
import Ptr ( nullPtr, plusPtr, minusPtr, Ptr )
import Foreign.Marshal.Alloc ( free )
import Foreign.Marshal.Array ( pokeArray, mallocArray, reallocArray,
                               peekArray, advancePtr )
import Foreign.Marshal.Utils ( with )
import Foreign.C.String
import Foreign.C.Types ( CLong, CInt, CSize )
import Data.Char
import Data.Word
import Monad ( liftM, when )

import System.IO.Unsafe ( unsafePerformIO )
import System.IO ( hPutBuf, hGetBuf )

import System.IO ( openBinaryFile )

import Foreign.ForeignPtr( ForeignPtr, withForeignPtr, mallocForeignPtrArray,
                           newForeignPtr )
#if defined(__GLASGOW_HASKELL__)
import qualified Foreign.Concurrent as FC ( newForeignPtr )
import System.Posix ( handleToFd )
#endif

#ifdef DEBUG_PS
import Foreign.ForeignPtr ( addForeignPtrFinalizer )
#endif
import Foreign.Ptr ( FunPtr )

debugForeignPtr :: ForeignPtr a -> String -> IO ()
#ifdef DEBUG_PS
foreign import ccall unsafe "static fpstring.h debug_alloc" debug_alloc
    :: Ptr a -> CString -> IO ()
foreign import ccall unsafe "static fpstring.h & debug_free" debug_free
    :: FunPtr (Ptr a -> IO ())
debugForeignPtr fp n =
    withCString n $ \cname-> withForeignPtr fp $ \p->
    do debug_alloc p cname
       addForeignPtrFinalizer debug_free fp
#else
debugForeignPtr _ _ = return ()
#endif

foreign import ccall unsafe "static fpstring.h dumb_malloc" dumb_malloc
    :: Int -> IO (Ptr Word8)
foreign import ccall unsafe "static stdio.h &free" c_free
    :: FunPtr (Ptr Word8 -> IO ())
mallocForeignPtr :: Int -> IO (ForeignPtr Word8)
mallocForeignPtr l
    = do when (l > 1000000) performGC
         if use_hideous_malloc_hack
          then do p <- dumb_malloc $ max 1 l
                  when (p == nullPtr) $
                       fail $ "Memory allocation error! "++show l
                  newForeignPtr c_free p
          else mallocForeignPtrArray l

----------------------------------------------------------------------------
--A way of creating ForeignPtrs outside the IO monad (althogh it still
--isn't entirely "safe", but at least it's convenient.

createPS :: Int -> (Ptr Word8 -> IO ()) -> PackedString
createPS l write_ptr =
    unsafePerformIO $ do fp <- mallocForeignPtr l
                         debugForeignPtr fp "createPS"
                         withForeignPtr fp $ \p -> write_ptr p
                         return $ PS fp 0 l

-- -----------------------------------------------------------------------------
-- PackedString type declaration

-- | A space-efficient representation of a 'String', which supports various
-- efficient operations.  A 'PackedString' contains full Unicode 'Char's.
data PackedString = PS !(ForeignPtr Word8) !Int !Int

-- -----------------------------------------------------------------------------
-- unsafeWithInternals

-- | Do something with the internals of a PackedString. Beware of
-- altering the contents!
unsafeWithInternals :: PackedString -> (Ptr Word8 -> Int -> IO a) -> IO a
unsafeWithInternals (PS fp s l) f
 = withForeignPtr fp $ \p -> f (p `plusPtr` s) l

{-# INLINE (!) #-}
(!) :: PackedString -> Int -> Word8
(PS x s _l) ! i
    = unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p (s+i)
  -- | i < 0 = error "Can't access negative element in PackedString."
  -- | i >= l = error "Out of range element in PackedString."
  -- | otherwise = unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p (s+i)

-- -----------------------------------------------------------------------------
-- generatePS

-- | Given the maximum size needed and a function to make the contents
-- of a PackedString, generatePS makes the PackedString. The generating
-- function is required to return the actual size (<= the maximum size).

generatePS :: Int -> (Ptr Word8 -> IO Int) -> IO PackedString
generatePS i f
 = do p <- mallocArray i
      i' <- f p
      p' <- reallocArray p i'
      fp <- newForeignPtr c_free p'
      return $ PS fp 0 i'

instance Eq PackedString where
   (==) = psniceq

foreign import ccall unsafe "static string.h memcmp" c_memcmp
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int

{-# INLINE psniceq #-}
psniceq :: PackedString -> PackedString -> Bool
psniceq a b | nullPS a && nullPS b = True
psniceq (PS x1 s1 l1) (PS x2 s2 l2) =
    ((l1 == l2) &&) $ unsafePerformIO $ withForeignPtr x1 $ \p1->
    withForeignPtr x2 $ \p2 ->
        if p1 == p2 && s1 == s2
        then return True
        else liftM (==0) $ c_memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1

instance Ord PackedString where
    compare = pscmp

pscmp :: PackedString -> PackedString -> Ordering
pscmp (PS x1 s1 l1) (PS x2 s2 l2) =
    unsafePerformIO $ withForeignPtr x1 $ \p1->
        withForeignPtr x2 $ \p2 ->
    let doc :: Ptr Word8 -> Ptr Word8 -> IO Ordering
        st1 = p1 `plusPtr` s1 `plusPtr` l1
        st2 = p2 `plusPtr` s2 `plusPtr` l2
        doc w1 w2 =
            if w1 == st1 && w2 == st2 then return EQ
            else if w1 == st1 then return LT
                 else if w2 == st2 then return GT
                      else do h1 <- peek w1
                              h2 <- peek w2
                              if h1 < h2
                                 then return LT
                                 else if h1 == h2
                                      then doc (w1 `plusPtr` 1) (w2 `plusPtr` 1)
                                      else return GT
        in
        doc (p1 `plusPtr` s1) (p2 `plusPtr` s2)

--instance Read PackedString: ToDo

instance Show PackedString where
    showsPrec p ps r = showsPrec p (unpackPS ps) r

-- -----------------------------------------------------------------------------
-- Constructor functions

nilPS :: PackedString
nilPS = unsafePerformIO $ do fp <- mallocForeignPtr 1
                             debugForeignPtr fp "nilPS"
                             return $ PS fp 0 0

consPS :: Char -> PackedString -> PackedString
consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better

-- | Convert a 'String' into a 'PackedString'
packString :: String -> PackedString
packString str = createPS (length str) $ \p -> pokeArray p $ map c2w str
packWords :: [Word8] -> PackedString
packWords s = createPS (length s) $ \p -> pokeArray p s

{-# INLINE w2c #-}
w2c :: Word8 -> Char
w2c = chr . fromIntegral
{-# INLINE c2w #-}
c2w :: Char -> Word8
c2w = fromIntegral . ord

-- -----------------------------------------------------------------------------
-- Destructor functions (taking PackedStrings apart)

-- | Convert a 'PackedString' into a 'String'
unpackPS :: PackedString -> String
unpackPS (PS ps s l)
 = map w2c $ unsafePerformIO
           $ withForeignPtr ps $ \p -> peekArray l (p `plusPtr` s)
{-
unpackPS :: PackedString -> String
unpackPS theps@(PS ps s l)
 | l >= 1024 = map w2c (unsafePerformIO (withForeignPtr ps $
                                        \p -> peekArray 1024 (p `plusPtr` s)))
            ++ unpackPS (PS ps (s + 1024) (l - 1024))
 | l >= 128 = map w2c (unsafePerformIO (withForeignPtr ps $
                                       \p -> peekArray 128 (p `plusPtr` s)))
           ++ unpackPS (PS ps (s + 128) (l - 128))
 | l > 0 = unsafeHeadPS theps : unpackPS (unsafeTailPS theps)
 | otherwise = ""
-}
{-
unpackPS :: PackedString -> String
unpackPS theps = if nullPS theps then []
                 else unsafeHeadPS theps : unpackPS (unsafeTailPS theps)
-}
unpackWords :: PackedString -> [Word8]
unpackWords ps@(PS x s _) =
    if nullPS ps then []
    else (unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p s)
             : unpackWords (unsafeTailPS ps)

unpackPSfromUTF8 :: PackedString -> String
unpackPSfromUTF8 (PS _ _ 0) = ""
unpackPSfromUTF8 (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    do outbuf <- mallocArray l
       lout <- utf8_to_ints outbuf (p `plusPtr` s) l
       when (lout < 0) $ error "Bad UTF8!"
       str <- (map chr) `liftM` peekArray lout outbuf
       free outbuf
       return str

foreign import ccall unsafe "static fpstring.h utf8_to_ints" utf8_to_ints
    :: Ptr Int -> Ptr Word8 -> Int -> IO Int

-- -----------------------------------------------------------------------------
-- List-mimicking functions for PackedStrings

{-# INLINE lengthPS #-}
lengthPS :: PackedString -> Int
lengthPS (PS _ _ l) = l

{-# INLINE indexPSW #-}
indexPSW :: PackedString -> Int -> Word8
indexPSW theps i | i < 0 = error "Negative index in indexPS"
                 | i >= lengthPS theps = error "Out of bounds in indexPS"
                 | otherwise = theps ! i

{-# INLINE indexPS #-}
indexPS :: PackedString -> Int -> Char
indexPS theps i | i < 0 = error "Negative index in indexPS"
                | i >= lengthPS theps = error "Out of bounds in indexPS"
                | otherwise = w2c $ theps ! i

{-# INLINE lastPS #-}
lastPS :: PackedString -> Char
lastPS ps@(PS x s l) -- ps ! 0 is inlined manually to eliminate a (+0)
  | nullPS ps = error "FastPackedString.lastPS: last []"
  | otherwise  = w2c $ unsafePerformIO $ withForeignPtr x $
                 \p -> peekElemOff p (s+l-1)

{-# INLINE headPS #-}
headPS :: PackedString -> Char
headPS ps@(PS x s _) -- ps ! 0 is inlined manually to eliminate a (+0)
  | nullPS ps = error "FastPackedString.headPS: head []"
  | otherwise  = w2c $ unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p s

{-# INLINE unsafeHeadPS #-}
unsafeHeadPS :: PackedString -> Char
unsafeHeadPS (PS x s _) -- ps ! 0 is inlined manually to eliminate a (+0)
  = w2c $ unsafePerformIO $ withForeignPtr x $ \p -> peekElemOff p s

{-# INLINE tailPS #-}
tailPS :: PackedString -> PackedString
tailPS ps
  | len <= 0 = error "FastPackedString.tailPS: tail []"
  | len == 1 = nilPS
  | otherwise  = substrPS ps 1 (len - 1)
  where
    len = lengthPS ps

{-# INLINE unsafeTailPS #-}
unsafeTailPS :: PackedString -> PackedString
unsafeTailPS (PS ps s l)
  | l == 1 = nilPS
  | otherwise  = PS ps (s+1) (l-1)

{-# INLINE nullPS #-}
nullPS :: PackedString -> Bool
nullPS (PS _ _ l) = l == 0

appendPS :: PackedString -> PackedString -> PackedString
appendPS xs ys
  | nullPS xs = ys
  | nullPS ys = xs
  | otherwise  = concatPS [xs,ys]

mapPS :: (Char -> Char) -> PackedString -> PackedString
mapPS func (PS ps s l) = createPS l $ \p-> withForeignPtr ps $
                         \f-> mint (f `plusPtr` s) p l
    where mint :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
          mint _ _ 0 = return ()
          mint f t len = do val <- peek f
                            poke t $ c2w $ func $ w2c val
                            mint (f `plusPtr` 1) (t `plusPtr` 1) (len - 1)

--filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
--filterPS pred ps = packString (filter pred (unpackPS ps))

foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
foldlPS f b ps = foldl f b (unpackPS ps)

foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
foldrPS f v ps = foldr f v (unpackPS ps)

{-# INLINE takePS #-}
takePS :: Int -> PackedString -> PackedString
takePS n ps@(PS x s _) = if n >= lengthPS ps then ps
                         else PS x s n -- substrPS ps 0 (n - 1)

{-# INLINE dropPS #-}
dropPS  :: Int -> PackedString -> PackedString
dropPS n ps@(PS x s l)
    | n >= lengthPS ps = nilPS
    | otherwise = PS x (s+n) (l-n) -- substrPS ps n (lengthPS ps - 1)

{-# INLINE splitAtPS #-}
splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
splitAtPS  n ps  = (takePS n ps, dropPS n ps)

-- This must be fast, it's used heavily in Printer. -- jch
anyPS :: (Char -> Bool) -> PackedString -> Bool
anyPS f (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \ptr ->
        lookat (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
    where lookat :: Ptr Word8 -> Ptr Word8 -> IO Bool
          lookat p st | p == st = return False
                      | otherwise = do w <- peek p
                                       if f $ w2c w
                                          then return True
                                          else lookat (p `plusPtr` 1) st

findWhenPS :: (Char -> Bool) -> PackedString -> Int
findWhenPS f ps = seq f $
    if nullPS ps then 0
    else if f $ unsafeHeadPS ps then 0
         else 1 + findWhenPS f (unsafeTailPS ps)

findFromEndUntilPS :: (Char -> Bool) -> PackedString -> Int
findFromEndUntilPS f ps@(PS x s l) = seq f $
    if nullPS ps then 0
    else if f $ lastPS ps then l
         else findFromEndUntilPS f (PS x s (l-1))

{-# INLINE takeWhilePS #-}
takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
takeWhilePS f ps = seq f $ takePS (findWhenPS (not . f) ps) ps

{-# INLINE dropWhilePS #-}
dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
dropWhilePS f ps = seq f $ dropPS (findWhenPS (not . f) ps) ps

{-# INLINE dropWhitePS #-}
dropWhitePS :: PackedString -> PackedString
dropWhitePS (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    do i <- first_nonwhite (p `plusPtr` s) l
       return $ if i == l then nilPS
                else PS x (s+i) (l-i)

foreign import ccall unsafe "fpstring.h first_nonwhite" first_nonwhite
    :: Ptr Word8 -> Int -> IO Int
foreign import ccall unsafe "fpstring.h first_white" first_white
    :: Ptr Word8 -> Int -> IO Int

{-# INLINE is_funky #-}
is_funky :: PackedString -> Bool
is_funky (PS x s l) = unsafePerformIO $ withForeignPtr x $ \p->
                      (/=0) `liftM` has_funky_char (p `plusPtr` s) l

foreign import ccall unsafe "fpstring.h has_funky_char" has_funky_char
    :: Ptr Word8 -> Int -> IO Int


elemPS :: Char -> PackedString -> Bool
elemPS c ps = c `elem` unpackPS ps

spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
spanPS  p ps = breakPS (not . p) ps

spanEndPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
spanEndPS  p ps = splitAtPS (findFromEndUntilPS (not.p) ps) ps

breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
breakPS p ps = case findWhenPS p ps of
               n -> (takePS n ps, dropPS n ps)

{-# INLINE breakOnPS #-}
breakOnPS :: Char -> PackedString -> (PackedString, PackedString)
breakOnPS c p = case findPS c p of
                Nothing -> (p,nilPS)
                Just n -> (takePS n p, dropPS n p)

{-# INLINE breakWhitePS #-}
breakWhitePS :: PackedString -> (PackedString,PackedString)
breakWhitePS (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    do i <- first_white (p `plusPtr` s) l
       if i == 0 then return (nilPS, PS x s l)
                 else if i == l
                      then return (PS x s l, nilPS)
                      else return (PS x s i, PS x (s+i) (l-i))

{-# INLINE breakFirstPS #-}
breakFirstPS :: Char -> PackedString -> Maybe (PackedString,PackedString)
breakFirstPS c p = case findPS c p of
                   Nothing -> Nothing
                   Just n -> Just (takePS n p, dropPS (n+1) p)

{-# INLINE breakLastPS #-}
breakLastPS :: Char -> PackedString -> Maybe (PackedString,PackedString)
breakLastPS c p = case findLastPS c p of
                  Nothing -> Nothing
                  Just n -> Just (takePS n p, dropPS (n+1) p)

{-# INLINE linesPS #-}
linesPS :: PackedString -> [PackedString]
linesPS ps = case wfindPS (c2w '\n') ps of
             Nothing -> [ps]
             Just n -> takePS n ps : linesPS (dropPS (n+1) ps)

unlinesPS :: [PackedString] -> PackedString
unlinesPS ss = concatPS $ intersperse_newlines ss
    where intersperse_newlines (a:b:s) = a:newline: intersperse_newlines (b:s)
          intersperse_newlines s = s
          newline = packString "\n"

wordsPS :: PackedString -> [PackedString]
wordsPS ps = splitWithPS isSpace ps

reversePS :: PackedString -> PackedString
reversePS ps = packString (reverse (unpackPS ps))

foreign import ccall unsafe "static string.h memcpy" c_memcpy
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()

concatPS :: [PackedString] -> PackedString
concatPS [] = nilPS
concatPS [ps] = ps
concatPS xs
 = unsafePerformIO $
   do let start_size = 1024
      p <- mallocArray start_size
      f p 0 1024 xs
    where f ptr len _ [] = do ptr' <- reallocArray ptr len
                              fp <- newForeignPtr c_free ptr'
                              return $ PS fp 0 len
          f ptr len to_go pss@(PS p s l:pss')
           | l <= to_go = do withForeignPtr p $ \pf ->
                                 c_memcpy (ptr `advancePtr` len)
                                          (pf `advancePtr` s) l
                             f ptr (len + l) (to_go - l) pss'
           | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
                            ptr' <- reallocArray ptr new_total
                            f ptr' len (new_total - len) pss

-- -----------------------------------------------------------------------------
-- concatLenPS

-- | Camse as concatPS only you tell it how big the result will be.
-- If you lie thenBad Things will happen.

concatLenPS :: Int -> [PackedString] -> PackedString
concatLenPS n [] = n `seq` nilPS
concatLenPS _ [ps] = ps
concatLenPS total_length pss = createPS total_length $ \p-> cpPSs p pss
    where cpPSs :: Ptr Word8 -> [PackedString] -> IO ()
          cpPSs p (PS x s l:rest) = do withForeignPtr x $ \pf ->
                                          c_memcpy p (pf `plusPtr` s) l
                                       cpPSs (p `plusPtr` l) rest
          cpPSs _ [] = return ()

{-# INLINE findPS #-}
findPS :: Char -> PackedString -> Maybe Int
findPS c ps = wfindPS (c2w c) ps

{-# INLINE wfindPS #-}
wfindPS :: Word8 -> PackedString -> Maybe Int
wfindPS c (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
    let p' = p `plusPtr` s
        q = memchr p' (fromIntegral c) (fromIntegral l)
    in return $ if q == nullPtr then Nothing
                                else Just (q `minusPtr` p')

foreign import ccall unsafe "string.h memchr" memchr
    :: Ptr Word8 -> CInt -> CSize -> Ptr Word8

{-# INLINE findLastPS #-}
findLastPS :: Char -> PackedString -> Maybe Int
findLastPS c ps = wfindLastPS (c2w c) ps

{-# INLINE wfindLastPS #-}
wfindLastPS :: Word8 -> PackedString -> Maybe Int
wfindLastPS c (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p->
                    findit (-1) (p `plusPtr` s) 0
    where findit h p i = if i >= l
                         then if h < 0
                              then return Nothing
                              else return $ Just h
                         else do here <- peekElemOff p i
                                 if c == here
                                    then findit i p (i+1)
                                    else findit h p (i+1)

------------------------------------------------------------

{-# INLINE splitPS #-}
splitPS :: Char -> PackedString -> [PackedString]
splitPS c = wsplitPS (c2w c)
{-# INLINE wsplitPS #-}
wsplitPS :: Word8 -> PackedString -> [PackedString]
wsplitPS c ps = case wfindPS c ps of
                Nothing -> if nullPS ps then [] else [ps]
                Just n -> takePS n ps : wsplitPS c (dropPS (n+1) ps)

splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
splitWithPS f ps =
    case [ m | m <- [0..lengthPS ps-1], f (w2c (ps ! m)) ] of
    [] -> if nullPS ps then [] else [ps]
    (n:_) -> takePS n ps : splitWithPS f (dropPS (n+1) ps)

-- -----------------------------------------------------------------------------
-- Local utility functions

-- The definition of @_substrPS@ is essentially:
-- @take (end - begin + 1) (drop begin str)@.

substrPS :: PackedString -> Int -> Int -> PackedString
substrPS (PS ps s _) begin end = PS ps (s+begin) (1+end-begin)
--substrPS (PS ps s l) begin end
--    | end <= l && begin <= end && begin >= 0 = PS ps (s+begin) (1+end-begin)
--    | otherwise = bug "substrPS out of bounds"

-- -----------------------------------------------------------------------------
-- hPutPS

-- | Outputs a 'PackedString' to the specified 'Handle'.
--
-- NOTE: the representation of the 'PackedString' in the file is assumed to
-- be in the ISO-8859-1 encoding.  In other words, only the least signficant
-- byte is taken from each character in the 'PackedString'.
hPutPS :: Handle -> PackedString -> IO ()
hPutPS _ (PS _ _ 0) = return ()
hPutPS h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
hPutPS h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l

-- -----------------------------------------------------------------------------
-- hGetPS

-- | Read a 'PackedString' directly from the specified 'Handle'.  This
-- is far more efficient than reading the characters into a 'String'
-- and then using 'packString'.
--
-- NOTE: as with 'hPutPS', the string representation in the file is
-- assumed to be ISO-8859-1.
hGetPS :: Handle -> Int -> IO PackedString
hGetPS _ 0 = return nilPS
hGetPS h i = do fp <- mallocForeignPtr i
                debugForeignPtr fp $ "hGetPS "++show h
                l <- withForeignPtr fp $ \p-> hGetBuf h p i
                return $ PS fp 0 l

-- -----------------------------------------------------------------------------
-- hGetContentsPS

-- | Read entire handle contents into a 'PackedString'.
--
-- NOTE: as with 'hGetPS', the string representation in the file is
-- assumed to be ISO-8859-1.

hGetContentsPS :: Handle -> IO PackedString
hGetContentsPS h
 = do let start_size = 1024
      p <- mallocArray start_size
      i <- hGetBuf h p start_size
      if i < start_size
       then do p' <- reallocArray p i
               fp <- newForeignPtr c_free p'
               return $ PS fp 0 i
       else f p start_size
    where f p s = do let s' = 2 * s
                     p' <- reallocArray p s'
                     i <- hGetBuf h (p' `plusPtr` s) s
                     if i < s then do let i' = s + i
                                      p'' <- reallocArray p' i'
                                      fp <- newForeignPtr c_free p''
                                      return $ PS fp 0 i'
                              else f p' s'

-- -----------------------------------------------------------------------------
-- readFilePS

-- | Read an entire file directly into a 'PackedString'.  This is far more
-- efficient than reading the characters into a 'String' and then using
-- 'packString'.  It also may be more efficient than opening the file and
-- reading it using hGetPS.
--
-- NOTE: as with 'hGetPS', the string representation in the file is
-- assumed to be ISO-8859-1.

readFilePS :: FilePath -> IO PackedString
readFilePS f = do h <- openBinaryFile f ReadMode
                  l <- hFileSize h
                  s <- hGetPS h $ fromIntegral l
                  hClose h
                  return s

-- -----------------------------------------------------------------------------
-- writeFilePS

-- | Write a 'PackedString' to a file.

writeFilePS :: FilePath -> PackedString -> IO ()
writeFilePS f ps = do h <- openBinaryFile f WriteMode
                      hPutPS h ps
                      hClose h

-- -----------------------------------------------------------------------------
-- gzReadFilePS

-- | Read an entire file, which may or may not be gzip compressed, directly
-- into a 'PackedString'.

foreign import ccall unsafe "static zlib.h gzopen" c_gzopen
    :: CString -> CString -> IO (Ptr ())
foreign import ccall unsafe "static zlib.h gzclose" c_gzclose
    :: Ptr () -> IO ()
foreign import ccall unsafe "static zlib.h gzread" c_gzread
    :: Ptr () -> Ptr Word8 -> Int -> IO Int

gzReadFilePS :: FilePath -> IO PackedString
gzReadFilePS f = do
    h <- openBinaryFile f ReadMode
    header <- hGetPS h 2
    if header /= packString "\31\139"
       then do hClose h
               mmapFilePS f
       else do hSeek h SeekFromEnd (-4)
               len <- hGetLittleEndInt h
               hClose h
               withCString f $ \fstr-> withCString "rb" $ \rb-> do
                 gzf <- c_gzopen fstr rb
                 when (gzf == nullPtr) $ fail $ "problem opening file "++f
                 fp <- mallocForeignPtr len
                 debugForeignPtr fp $ "gzReadFilePS "++f
                 lread <- withForeignPtr fp $ \p -> c_gzread gzf p len
                 c_gzclose gzf
                 when (lread /= len) $ fail $ "problem gzreading file "++f
                 return $ PS fp 0 len

hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
    b1 <- ord `liftM` hGetChar h
    b2 <- ord `liftM` hGetChar h
    b3 <- ord `liftM` hGetChar h
    b4 <- ord `liftM` hGetChar h
    return $ b1 + 256*b2 + 65536*b3 + 16777216*b4

-- -----------------------------------------------------------------------------
-- mmapFilePS

-- | Like readFilePS, this reads an entire file directly into a
-- 'PackedString', but it is even more efficient.  It involves directly
-- mapping the file to memory.  This has the advantage that the contents of
-- the file never need to be copied.  Also, under memory pressure the page
-- may simply be discarded, wile in the case of readFilePS it would need to
-- be written to swap.  If you read many small files, mmapFilePS will be
-- less memory-efficient than readFilePS, since each mmapFilePS takes up a
-- separate page of memory.  Also, you can run into bus errors if the file
-- is modified.  NOTE: as with 'readFilePS', the string representation in
-- the file is assumed to be ISO-8859-1.

mmapFilePS :: FilePath -> IO PackedString
mmapFilePS f = if use_mmap
               then do (fp,l) <- mmap f
                       return $ PS fp 0 l
               else readFilePS f

#if defined(__GLASGOW_HASKELL__)
foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
    :: Int -> Int -> IO (Ptr Word8)
foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
    :: Ptr Word8 -> Int -> IO Int
foreign import ccall unsafe "static unistd.h close" c_close
    :: Int -> IO Int
#endif

mmap :: FilePath -> IO (ForeignPtr Word8, Int)
mmap f = do
    h <- openBinaryFile f ReadMode
    l <- fromIntegral `liftM` hFileSize h
    -- Don't bother mmaping small files because each mmapped file takes up
    -- at least one full VM block.
    if l < mmap_limit
       then do thefp <- mallocForeignPtr l
               debugForeignPtr thefp $ "mmap short file "++f
               withForeignPtr thefp $ \p-> hGetBuf h p l
               hClose h
               return (thefp, l)
       else do
#if defined(__GLASGOW_HASKELL__)
               fd <- fromIntegral `liftM` handleToFd h
               p <- my_mmap l fd
               fp <- if p == nullPtr
                     then
#else
               fp <-
#endif
                          do thefp <- mallocForeignPtr l
                             debugForeignPtr thefp $ "mmap short file "++f
                             withForeignPtr thefp $ \p' -> hGetBuf h p' l
                             return thefp
#if defined(__GLASGOW_HASKELL__)
                     else do
                             fp <- FC.newForeignPtr p (do {c_munmap p l; return (); })
                             debugForeignPtr fp $ "mmap "++f
                             return fp
               c_close fd
#endif
               hClose h
               return (fp, l)
    where mmap_limit = 16*1024


-- -------------------------------------------------------------------------
-- readIntPS

-- | readIntPS skips any whitespace at the beginning of its argument, and
-- reads an Int from the beginning of the PackedString.  If there is no
-- integer at the beginning of the string, it returns Nothing, otherwise it
-- just returns the int read, along with a PackedString containing the
-- remainder of its input.  The actual parsing is done by the standard C
-- library function strtol.

foreign import ccall unsafe "static stdlib.h strtol" c_strtol
    :: Ptr Word8 -> Ptr (Ptr Word8) -> Int -> IO CLong

readIntPS :: PackedString -> Maybe (Int, PackedString)
readIntPS (PS x s l) =
    unsafePerformIO $ withForeignPtr x $ \p-> with p $ \endpp ->
    do val <- c_strtol (p `plusPtr` s) endpp 0
       skipped <- (`minusPtr` (p `plusPtr` s)) `liftM` peek endpp
       if skipped == 0
          then return Nothing
          else return $ Just (fromIntegral val,
                              PS x (s+skipped) (l-skipped))

-- -------------------------------------------------------------------------
-- fromPS2Hex

foreign import ccall unsafe "static fpstring.h conv_to_hex" conv_to_hex
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()

fromPS2Hex :: PackedString -> PackedString
fromPS2Hex (PS x s l) = createPS (2*l) $ \p -> withForeignPtr x $ \f ->
           conv_to_hex p (f `plusPtr` s) l

-- -------------------------------------------------------------------------
-- fromHex2PS

foreign import ccall unsafe "static fpstring.h conv_from_hex" conv_from_hex
    :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()

fromHex2PS :: PackedString -> PackedString
fromHex2PS (PS x s l) = createPS (l `div` 2) $ \p -> withForeignPtr x $ \f ->
           conv_from_hex p (f `plusPtr` s) (l `div` 2)

-- -------------------------------------------------------------------------
-- mylexPS

-- | mylexPS reads a single token from the PackedString, where a token is
-- defined as a sequence of non-whitespace characters which are delimited
-- by whitespace.

mylexPS :: PackedString -> Maybe (PackedString,PackedString)
mylexPS s = if nullPS (dropWhitePS s)
            then Nothing
            else Just $ breakWhitePS $ dropWhitePS s

-- -------------------------------------------------------------------------
-- betweenLinesPS

-- | betweenLinesPS returns the PackedString between the two lines given,
-- or Nothing if they do not appear.

betweenLinesPS :: PackedString -> PackedString -> PackedString
               -> Maybe (PackedString)
betweenLinesPS start end ps
 = case break (start ==) (linesPS ps) of
       (_, _:rest@(PS ps1 s1 _:_)) ->
           case break (end ==) rest of
               (_, PS _ s2 _:_) -> Just $ PS ps1 s1 (s2 - s1)
               _ -> Nothing
       _ -> Nothing

Text file: sample_haskell_darcs.hs

blog comments powered by Disqus