-----------------------------------------------------------------------------
module System.IO.LineIndexedCursor (
- LineIndexedCursor(..), mkLineIndexedCursor
+ LineIndexedCursor(..), mkLineIndexedCursor, mkLineIndexedCursorWithCapacity
) where
+import qualified Data.Array as A
import Data.ByteString (ByteString, hGetLine)
import Control.Concurrent.MVar
import System.IO (Handle, hTell, hSeek, SeekMode(..), hIsEOF)
+defaultListCapacity :: Integer
+defaultListCapacity = 16384
+
-- | ADT with methods, hiding the internal state.
data LineIndexedCursor = LineIndexedCursor
{
-- | Returns current line number.
, getCurrentLineNumber :: IO Integer
+ -- | Reads from the latest line from index until EOF to build the full index.
+ , doFullScan :: IO ()
+
-- | Rewinds to the requsted line number. Stops at EOF if it's too big.
-- Returns the reached line number.
, goToLine :: Integer -> IO Integer
data CursorHandle = CursorHandle
{ fileHandle :: Handle
- , linesIdx :: MVar ([Integer], Integer, Integer)
+ , cursorState :: MVar CursorState
+ , listCapacity :: Integer
+ }
+
+data CursorState = CursorState
+ { cursorLinesIdx :: ![Integer]
+ , cursorLinesArrIdx :: !(Maybe (A.Array Integer Integer))
+ , cursorIdxSize :: !Integer
+ , cursorCurrentLineNumber :: !Integer
}
+mElems :: (Maybe (A.Array Integer Integer)) -> [Integer]
+mElems = maybe [] A.elems
+
{- |
Builds 'LineIndexedCursor'.
-}
mkLineIndexedCursor :: Handle -> IO LineIndexedCursor
-mkLineIndexedCursor fileHandle = do
+mkLineIndexedCursor = flip mkLineIndexedCursorWithCapacity defaultListCapacity
+
+mkLineIndexedCursorWithCapacity :: Handle -> Integer -> IO LineIndexedCursor
+mkLineIndexedCursorWithCapacity fileHandle listCapacity = do
-- reset the handle's offset to the beginning
hSeek fileHandle AbsoluteSeek 0
- linesIdx <- newMVar ([0], 0, 0)
+ cursorState <- newMVar $ CursorState [0] Nothing 0 0
- let cursorHandle = CursorHandle fileHandle linesIdx
+ let cursorHandle = CursorHandle fileHandle cursorState listCapacity
pure $ LineIndexedCursor
{ getCurrentLine = getCurrentLine' cursorHandle
, getCurrentLineNumber = getCurrentLineNumber' cursorHandle
+ , doFullScan = doFullScan' cursorHandle
, goToLine = goToLine' cursorHandle
, getHandle = fileHandle
}
hIsEOF fileHandle >>= \isEOF -> if isEOF then pure Nothing else do
line <- hGetLine fileHandle
offset <- hTell fileHandle
- modifyMVar_ linesIdx $ \(idx, size, cln) -> pure $
+
+ modifyMVar_ cursorState $ \(CursorState idx arr size cln) -> pure $
if (not $ offset `elem` idx)
- then (offset : idx, size + 1, cln + 1)
- else (idx, size, cln + 1)
+ then
+ let
+ (newIdx, newArr) =
+ if length (offset : idx) > fromIntegral listCapacity
+ then
+ let res = (offset : idx) ++ mElems arr
+ in ([], Just $ A.listArray (0, toInteger $ length res - 1) res)
+ else (offset : idx, arr)
+ in CursorState
+ { cursorLinesIdx = newIdx
+ , cursorLinesArrIdx = newArr
+ , cursorIdxSize = size + 1
+ , cursorCurrentLineNumber = cln + 1
+ }
+ else CursorState
+ { cursorLinesIdx = idx
+ , cursorLinesArrIdx = arr
+ , cursorIdxSize = size
+ , cursorCurrentLineNumber = cln + 1
+ }
pure $ Just line
+doFullScan' :: CursorHandle -> IO ()
+doFullScan' CursorHandle{..} = do
+ modifyMVar_ cursorState $ \cs@(CursorState idx arr size _) -> do
+ -- go to the end of the index
+ hSeek fileHandle AbsoluteSeek (getFirst cs)
+ -- try to read until the EOF
+ idxTail <- readUntilEOF []
+ let
+ newSize = size + (fromIntegral $ length idxTail)
+ newState = CursorState
+ { cursorLinesIdx = idxTail ++ idx
+ , cursorLinesArrIdx = arr
+ , cursorIdxSize = newSize
+ , cursorCurrentLineNumber = newSize
+ }
+ pure newState
+ where
+ readUntilEOF idx =
+ hIsEOF fileHandle >>= \isEOF -> if isEOF then pure idx else do
+ _ <- hGetLine fileHandle
+ offset <- hTell fileHandle
+ readUntilEOF (fromInteger offset : idx)
+
getCurrentLineNumber' :: CursorHandle -> IO Integer
getCurrentLineNumber' CursorHandle{..} = do
- (_, _, cln) <- readMVar linesIdx
+ CursorState _ _ _ cln <- readMVar cursorState
pure cln
goToLine' :: CursorHandle -> Integer -> IO Integer
goToLine' ch@CursorHandle{..} ln =
if (ln < 0) then getCurrentLineNumber' ch
- else modifyMVar linesIdx $ \(idx, size, _) -> do
+ else modifyMVar cursorState $ \cs@(CursorState idx arr size _) -> do
if ln > size then do
- hSeek fileHandle AbsoluteSeek (idx !! 0)
+ hSeek fileHandle AbsoluteSeek (getFirst cs)
-- try to read until the requested line number
idxTail <- readUntil (ln - size) []
- let newSize = size + (fromIntegral $ length idxTail)
- pure ((idxTail ++ idx, newSize, newSize), newSize)
+ let
+ newSize = size + (fromIntegral $ length idxTail)
+ (newIdx, newArr) =
+ if newSize > listCapacity
+ then
+ let res = (idxTail ++ idx) ++ mElems arr
+ in ([], Just $ A.listArray (0, toInteger $ length res - 1) res)
+ else (idxTail ++ idx, arr)
+ newState = CursorState
+ { cursorLinesIdx = newIdx
+ , cursorLinesArrIdx = newArr
+ , cursorIdxSize = newSize
+ , cursorCurrentLineNumber = newSize
+ }
+ pure (newState, newSize)
else do
- let nextSeekIndex = fromIntegral $ size - ln
- hSeek fileHandle AbsoluteSeek (idx !! nextSeekIndex)
- pure ((idx, size, ln), ln)
+ let nextSeekIndex = size - ln
+
+ if nextSeekIndex >= fromIntegral (length idx)
+ then case arr of
+ Just a -> hSeek fileHandle AbsoluteSeek (a A.! (nextSeekIndex - fromIntegral (length idx)))
+ Nothing -> error "goToLine: there is no array"
+ else hSeek fileHandle AbsoluteSeek (idx !! fromIntegral nextSeekIndex)
+
+ let
+ newState = CursorState
+ { cursorLinesIdx = idx
+ , cursorLinesArrIdx = arr
+ , cursorIdxSize = size
+ , cursorCurrentLineNumber = ln
+ }
+ pure (newState, ln)
where
readUntil 0 idx = pure idx
readUntil counter idx =
hIsEOF fileHandle >>= \isEOF -> if isEOF then pure idx else do
_ <- hGetLine fileHandle
offset <- hTell fileHandle
- readUntil (counter - 1) (fromInteger offset : idx)
\ No newline at end of file
+ readUntil (counter - 1) (fromInteger offset : idx)
+
+getFirst :: CursorState -> Integer
+getFirst (CursorState idx (Just arr) _ _) = if null idx then arr A.! 0 else idx !! 0
+getFirst (CursorState idx Nothing _ _) = idx !! 0
\ No newline at end of file
import System.IO
import System.IO.LineIndexedCursor
+import Data.Foldable
import Test.Hspec
main :: IO ()
main = hspec $ do
let
- mkCursor = do
+ mkCursor capacity = do
h <- openFile "test/testdata" ReadMode
- c <- mkLineIndexedCursor h
+ c <- mkLineIndexedCursorWithCapacity h capacity
pure (h, c)
- before mkCursor . after (\(h, _) -> hClose h)
- $ describe "System.IO.LineIndexedCursor" $ do
+ forM_ [0 :: Integer ..20] $ \capacity -> do
+ before (mkCursor capacity) . after (\(h, _) -> hClose h)
+ $ describe ("System.IO.LineIndexedCursor with list capacity " ++ show capacity) $ do
- it "getCurrentLine works" $ \(_, c) -> do
- l <- getCurrentLine c
- l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+ it "getCurrentLine works" $ \(_, c) -> do
+ l <- getCurrentLine c
+ l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
- it "goToLine works" $ \(_, c) -> do
- ln <- goToLine c 10
- ln `shouldBe` 10
+ ln <- goToLine c 3
+ ln `shouldBe` 3
- l <- getCurrentLine c
- l `shouldBe` Just "Sed elementum velit sit amet orci mollis tincidunt."
+ l' <- getCurrentLine c
+ l' `shouldBe` Just "Curabitur nec mi sit amet justo condimentum gravida."
- it "goToLine is negative" $ \(_, c) -> do
- ln <- goToLine c (-10)
- ln `shouldBe` 0
+ l'' <- getCurrentLine c
+ l'' `shouldBe` Just "Pellentesque accumsan dolor at nisl pulvinar, ut bibendum diam egestas."
- l <- getCurrentLine c
- l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+ ln' <- goToLine c 3
+ ln' `shouldBe` 3
- it "goToLine is too big" $ \(_, c) -> do
- l <- getCurrentLine c
- l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+ ln'' <- goToLine c 2
+ ln'' `shouldBe` 2
- ln <- goToLine c 30
- ln `shouldBe` 20
+ ln''' <- goToLine c 1
+ ln''' `shouldBe` 1
- l' <- getCurrentLine c
- l' `shouldBe` Nothing
+ ln'''' <- goToLine c 0
+ ln'''' `shouldBe` 0
- it "read line, then go to beginning and forth" $ \(_, c) -> do
- cln <- getCurrentLineNumber c
- cln `shouldBe` 0
+ it "goToLine works" $ \(_, c) -> do
+ ln <- goToLine c 10
+ ln `shouldBe` 10
- l <- getCurrentLine c
- l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+ l <- getCurrentLine c
+ l `shouldBe` Just "Sed elementum velit sit amet orci mollis tincidunt."
- cln' <- getCurrentLineNumber c
- cln' `shouldBe` 1
+ it "goToLine is negative" $ \(_, c) -> do
+ ln <- goToLine c (-10)
+ ln `shouldBe` 0
- _ <- getCurrentLine c
- _ <- getCurrentLine c
- _ <- getCurrentLine c
- _ <- getCurrentLine c
- _ <- getCurrentLine c
+ l <- getCurrentLine c
+ l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
- cln'' <- getCurrentLineNumber c
- cln'' `shouldBe` 6
+ it "goToLine is too big" $ \(_, c) -> do
+ l <- getCurrentLine c
+ l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
- ln <- goToLine c 0
- ln `shouldBe` 0
+ ln <- goToLine c 30
+ ln `shouldBe` 20
- cln''' <- getCurrentLineNumber c
- cln''' `shouldBe` 0
+ l' <- getCurrentLine c
+ l' `shouldBe` Nothing
- l' <- getCurrentLine c
- l' `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+ it "fullScan works" $ \(_, c) -> do
+ l <- getCurrentLine c
+ l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
- ln' <- goToLine c 5
- ln' `shouldBe` 5
+ doFullScan c
- l'' <- getCurrentLine c
- l'' `shouldBe` Just "Curabitur nec dui posuere, tincidunt turpis vitae, tincidunt magna."
+ cln <- getCurrentLineNumber c
+ cln `shouldBe` 20
- ln'' <- goToLine c 6
- ln'' `shouldBe` 6
+ l' <- getCurrentLine c
+ l' `shouldBe` Nothing
- ln''' <- goToLine c 7
- ln''' `shouldBe` 7
+ it "read line, then go to beginning and forth" $ \(_, c) -> do
+ cln <- getCurrentLineNumber c
+ cln `shouldBe` 0
- ln'''' <- goToLine c 10
- ln'''' `shouldBe` 10
+ l <- getCurrentLine c
+ l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+
+ cln' <- getCurrentLineNumber c
+ cln' `shouldBe` 1
+
+ _ <- getCurrentLine c
+ _ <- getCurrentLine c
+ _ <- getCurrentLine c
+ _ <- getCurrentLine c
+ _ <- getCurrentLine c
+
+ cln'' <- getCurrentLineNumber c
+ cln'' `shouldBe` 6
+
+ ln <- goToLine c 0
+ ln `shouldBe` 0
+
+ cln''' <- getCurrentLineNumber c
+ cln''' `shouldBe` 0
+
+ l' <- getCurrentLine c
+ l' `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+
+ ln' <- goToLine c 5
+ ln' `shouldBe` 5
+
+ l'' <- getCurrentLine c
+ l'' `shouldBe` Just "Curabitur nec dui posuere, tincidunt turpis vitae, tincidunt magna."
+
+ ln'' <- goToLine c 6
+ ln'' `shouldBe` 6
+
+ ln''' <- goToLine c 7
+ ln''' `shouldBe` 7
+
+ ln'''' <- goToLine c 10
+ ln'''' `shouldBe` 10