From 0e109fc1e5829c59e779acdd81753f637b946f33 Mon Sep 17 00:00:00 2001 From: Evgenii Akentev Date: Sun, 23 Jul 2023 01:11:56 +0400 Subject: [PATCH] Add: comments, 'getCurrentLineUnsafe', 'getCursorState'. Refactor a bit. --- src/System/IO/LineIndexedCursor.hs | 130 +++++++++++++++++------------ test/Main.hs | 3 + 2 files changed, 81 insertions(+), 52 deletions(-) diff --git a/src/System/IO/LineIndexedCursor.hs b/src/System/IO/LineIndexedCursor.hs index 5d550eb..871c23c 100644 --- a/src/System/IO/LineIndexedCursor.hs +++ b/src/System/IO/LineIndexedCursor.hs @@ -8,14 +8,17 @@ -- -- Line-indexed file reader. -- --- Lazily builds the index of line numbers while reading the file +-- Lazily builds the index with the line numbers while reading the file -- making it possible to rewind to them quickly later. ----------------------------------------------------------------------------- -module System.IO.LineIndexedCursor ( - LineIndexedCursor(..), mkLineIndexedCursor, mkLineIndexedCursorWithCapacity +module System.IO.LineIndexedCursor + ( LineIndexedCursor(..) + , mkLineIndexedCursor + , mkLineIndexedCursorWithCapacity ) where +import Data.Maybe (fromMaybe) import qualified Data.Array as A import Data.ByteString (ByteString, hGetLine) import Control.Concurrent.MVar @@ -25,23 +28,32 @@ defaultListCapacity :: Integer defaultListCapacity = 16384 -- | ADT with methods, hiding the internal state. +-- +-- 'LineIndexedCursor.getCurrentLine', 'LineIndexedCursor.getCurrentLineUnsafe', +-- 'LineIndexedCursor.doFullScan', and 'LineIndexedCursor.goToLine', all throw 'System.IO.IOError'. data LineIndexedCursor = LineIndexedCursor { - -- | Same as 'hGetLine' but safe. + -- | Same as 'LineIndexedCursor.getCurrentLineUnsafe' but safely handles 'System.IO.EOF'. getCurrentLine :: IO (Maybe ByteString) - -- | Returns current line number. + -- | A wrapper around 'hGetLine'. Throws the same exceptions. + , getCurrentLineUnsafe :: IO ByteString + + -- | Returns the current line number. , getCurrentLineNumber :: IO Integer - -- | Reads from the latest line from index until EOF to build the full index. + -- | Reads from the latest known line until EOF to build the full index. , doFullScan :: IO () - -- | Rewinds to the requsted line number. Stops at EOF if it's too big. - -- Returns the reached line number. + -- | Rewinds the file handle to the requsted line number. Stops at the EOF if it's too big, + -- returning the reached line number. , goToLine :: Integer -> IO Integer -- | Returns the file 'Handle'. , getHandle :: Handle + + -- | Returns the current state of the cursor — all known line indexes. + , getCursorState :: IO [Integer] } data CursorHandle = CursorHandle @@ -52,19 +64,14 @@ data CursorHandle = CursorHandle data CursorState = CursorState { cursorLinesIdx :: ![Integer] - , cursorLinesArrIdx :: !(Maybe (A.Array Integer Integer)) + , cursorLinesArrIdx :: !(Maybe (A.Array Integer Integer)) -- uses Maybe since can't be empty , cursorIdxSize :: !Integer , cursorCurrentLineNumber :: !Integer } -mElems :: (Maybe (A.Array Integer Integer)) -> [Integer] -mElems = maybe [] A.elems - {- | -Builds 'LineIndexedCursor'. - -Resets the file handle's ofsset to the beginning. +Builds 'LineIndexedCursor'. Resets the file handle's ofsset to the beginning. Use 'System.IO.hSetNewlineMode' if you want to configure 'System.IO.NewlineMode'. @@ -72,6 +79,7 @@ Use 'System.IO.hSetNewlineMode' if you want to configure 'System.IO.NewlineMode' mkLineIndexedCursor :: Handle -> IO LineIndexedCursor mkLineIndexedCursor = flip mkLineIndexedCursorWithCapacity defaultListCapacity +-- | Same as 'mkLineIndexedCursor' but allows to configure the list's capacity. mkLineIndexedCursorWithCapacity :: Handle -> Integer -> IO LineIndexedCursor mkLineIndexedCursorWithCapacity fileHandle listCapacity = do -- reset the handle's offset to the beginning @@ -82,10 +90,12 @@ mkLineIndexedCursorWithCapacity fileHandle listCapacity = do let cursorHandle = CursorHandle fileHandle cursorState listCapacity pure $ LineIndexedCursor { getCurrentLine = getCurrentLine' cursorHandle + , getCurrentLineUnsafe = getCurrentLineUnsafe' cursorHandle , getCurrentLineNumber = getCurrentLineNumber' cursorHandle , doFullScan = doFullScan' cursorHandle , goToLine = goToLine' cursorHandle , getHandle = fileHandle + , getCursorState = getCursorState' cursorHandle } getCurrentLine' :: CursorHandle -> IO (Maybe ByteString) @@ -94,35 +104,42 @@ getCurrentLine' CursorHandle{..} = line <- hGetLine fileHandle offset <- hTell fileHandle - modifyMVar_ cursorState $ \(CursorState idx arr size cln) -> pure $ - if (not $ offset `elem` idx) - then - let - (newIdx, newArr) = - if length (offset : idx) > fromIntegral listCapacity - then - let res = (offset : idx) ++ mElems arr - in ([], Just $ A.listArray (0, toInteger $ length res - 1) res) - else (offset : idx, arr) - in CursorState - { cursorLinesIdx = newIdx - , cursorLinesArrIdx = newArr - , cursorIdxSize = size + 1 - , cursorCurrentLineNumber = cln + 1 - } - else CursorState - { cursorLinesIdx = idx - , cursorLinesArrIdx = arr - , cursorIdxSize = size - , cursorCurrentLineNumber = cln + 1 - } + modifyMVar_ cursorState $ \cs@(CursorState idx arr size cln) -> + let latestIdx = getLatestIdx cs + in pure $ + if (offset <= latestIdx) + -- we already know this offset, so just increment the current line number + then cs { cursorCurrentLineNumber = cln + 1 } + -- otherwise we need to add the offset + else + let + (newIdx, newArr) = + -- if we have exceed the list capacity + if length (offset : idx) > fromIntegral listCapacity + -- move the list content to the array and empty the list + then + let res = (offset : idx) ++ maybe [] A.elems arr + in ([], Just $ A.listArray (0, toInteger $ length res - 1) res) + -- otherwise keep the offset in the list + else (offset : idx, arr) + in CursorState + { cursorLinesIdx = newIdx + , cursorLinesArrIdx = newArr + , cursorIdxSize = size + 1 + , cursorCurrentLineNumber = cln + 1 + } pure $ Just line +getCurrentLineUnsafe' :: CursorHandle -> IO ByteString +getCurrentLineUnsafe' ch = do + cl <- getCurrentLine' ch + pure $ fromMaybe (error "getCurrentLineUnsafe: couldn't get the current line") cl + doFullScan' :: CursorHandle -> IO () doFullScan' CursorHandle{..} = do modifyMVar_ cursorState $ \cs@(CursorState idx arr size _) -> do -- go to the end of the index - hSeek fileHandle AbsoluteSeek (getFirst cs) + hSeek fileHandle AbsoluteSeek (getLatestIdx cs) -- try to read until the EOF idxTail <- readUntilEOF [] let @@ -148,19 +165,25 @@ getCurrentLineNumber' CursorHandle{..} = do goToLine' :: CursorHandle -> Integer -> IO Integer goToLine' ch@CursorHandle{..} ln = + -- handle negative input if (ln < 0) then getCurrentLineNumber' ch else modifyMVar cursorState $ \cs@(CursorState idx arr size _) -> do + -- if the requested line number is out of the index's scope if ln > size then do - hSeek fileHandle AbsoluteSeek (getFirst cs) + -- go to the end of the index + hSeek fileHandle AbsoluteSeek (getLatestIdx cs) -- try to read until the requested line number idxTail <- readUntil (ln - size) [] let newSize = size + (fromIntegral $ length idxTail) (newIdx, newArr) = + -- if we have exceed the list capacity if newSize > listCapacity + -- move the list content to the array and empty the list then - let res = (idxTail ++ idx) ++ mElems arr + let res = (idxTail ++ idx) ++ maybe [] A.elems arr in ([], Just $ A.listArray (0, toInteger $ length res - 1) res) + -- otherwise add offsets to the list else (idxTail ++ idx, arr) newState = CursorState { cursorLinesIdx = newIdx @@ -169,23 +192,19 @@ goToLine' ch@CursorHandle{..} ln = , cursorCurrentLineNumber = newSize } pure (newState, newSize) + -- otherwise access the offset in the cache (list + array) else do let nextSeekIndex = size - ln - + -- if the seek index is bigger than the current list size if nextSeekIndex >= fromIntegral (length idx) + -- try to access the array then case arr of Just a -> hSeek fileHandle AbsoluteSeek (a A.! (nextSeekIndex - fromIntegral (length idx))) Nothing -> error "goToLine: there is no array" + -- otherwise take the offset from the list else hSeek fileHandle AbsoluteSeek (idx !! fromIntegral nextSeekIndex) - let - newState = CursorState - { cursorLinesIdx = idx - , cursorLinesArrIdx = arr - , cursorIdxSize = size - , cursorCurrentLineNumber = ln - } - pure (newState, ln) + pure (cs { cursorCurrentLineNumber = ln } , ln) where readUntil 0 idx = pure idx readUntil counter idx = @@ -194,6 +213,13 @@ goToLine' ch@CursorHandle{..} ln = offset <- hTell fileHandle readUntil (counter - 1) (fromInteger offset : idx) -getFirst :: CursorState -> Integer -getFirst (CursorState idx (Just arr) _ _) = if null idx then arr A.! 0 else idx !! 0 -getFirst (CursorState idx Nothing _ _) = idx !! 0 \ No newline at end of file +getCursorState' :: CursorHandle -> IO [Integer] +getCursorState' CursorHandle{..} = do + CursorState l arr _ _ <- readMVar cursorState + pure $ reverse $ l ++ maybe [] A.elems arr + +-- Utils + +getLatestIdx :: CursorState -> Integer +getLatestIdx (CursorState idx (Just arr) _ _) = if null idx then arr A.! 0 else idx !! 0 +getLatestIdx (CursorState idx Nothing _ _) = idx !! 0 diff --git a/test/Main.hs b/test/Main.hs index 20cbf3a..0d15f26 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -81,6 +81,9 @@ main = hspec $ do l' <- getCurrentLine c l' `shouldBe` Nothing + s <- getCursorState c + s `shouldBe` [0,57,117,191,244,316,384,429,511,561,616,668,715,761,799,851,907,941,981,1024,1068] + it "read line, then go to beginning and forth" $ \(_, c) -> do cln <- getCurrentLineNumber c cln `shouldBe` 0 -- 2.34.1