From 117fc9ded180cbec77f4633d3de717c9170af81b Mon Sep 17 00:00:00 2001 From: Evgenii Akentev Date: Sun, 9 Jul 2023 18:10:38 +0400 Subject: [PATCH] Create list based version. --- .gitignore | 27 +++++++++ CHANGELOG.md | 5 ++ LICENSE | 20 +++++++ line-indexed-cursor.cabal | 35 +++++++++++ src/System/IO/LineIndexedCursor.hs | 96 ++++++++++++++++++++++++++++++ test/Main.hs | 71 ++++++++++++++++++++++ test/testdata | 20 +++++++ 7 files changed, 274 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 LICENSE create mode 100644 line-indexed-cursor.cabal create mode 100644 src/System/IO/LineIndexedCursor.hs create mode 100644 test/Main.hs create mode 100644 test/testdata diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e5404e --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +dist +dist-* +cabal-dev +*.o +*.hi +*.hie +*.chi +*.chs.h +*.dyn_o +*.dyn_hi +.hpc +.hsenv +.cabal-sandbox/ +cabal.sandbox.config +*.prof +*.aux +*.hp +*.eventlog +.stack-work/ +cabal.project.local +cabal.project.local~ +.HTF/ +.ghc.environment.* +*.output +packagedb +cache +build \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..58eae78 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Revision history for line-indexed-file-cursor + +## 0.1.0.0 -- YYYY-mm-dd + +* First version. Released on an unsuspecting world. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4bfd620 --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2023 Evgenii Akentev + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/line-indexed-cursor.cabal b/line-indexed-cursor.cabal new file mode 100644 index 0000000..3aaec61 --- /dev/null +++ b/line-indexed-cursor.cabal @@ -0,0 +1,35 @@ +cabal-version: 3.0 +name: line-indexed-cursor +version: 0.1.0.0 +license: MIT +license-file: LICENSE +author: Evgenii Akentev +maintainer: i@ak3n.com +category: Development +build-type: Simple +extra-doc-files: CHANGELOG.md + +common warnings + ghc-options: + -Wall -Wnoncanonical-monad-instances -Wincomplete-uni-patterns + -Wincomplete-record-updates -Wredundant-constraints -Widentities + -Wunused-packages -Wmissing-deriving-strategies + + +library + import: warnings + exposed-modules: System.IO.LineIndexedCursor + build-depends: base ^>=4.18.0.0, bytestring ^>= 0.11 + hs-source-dirs: src + default-language: Haskell2010 + +test-suite line-indexed-cursor-test + import: warnings + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Main.hs + build-depends: + base >= 4.7 && < 5, + hspec >= 2.10 && < 3, + line-indexed-cursor diff --git a/src/System/IO/LineIndexedCursor.hs b/src/System/IO/LineIndexedCursor.hs new file mode 100644 index 0000000..c26c8cf --- /dev/null +++ b/src/System/IO/LineIndexedCursor.hs @@ -0,0 +1,96 @@ +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE RecordWildCards #-} + +----------------------------------------------------------------------------- +-- | +-- Module : System.IO.LineIndexedCursor +-- Maintainer : i@ak3n.com +-- +-- Line-indexed file reader. +-- +-- Lazily builds the index of line numbers while reading the file +-- making it possible to rewind to them quickly later. +----------------------------------------------------------------------------- + +module System.IO.LineIndexedCursor ( + LineIndexedCursor(..), mkLineIndexedCursor + ) where + +import Data.ByteString (ByteString, hGetLine) +import Control.Concurrent.MVar +import System.IO (Handle, hTell, hSeek, SeekMode(..), hIsEOF) + +-- | ADT with methods, hiding the internal state. +data LineIndexedCursor = LineIndexedCursor + { + -- | Same as 'hGetLine' but safe. + getCurrentLine :: IO (Maybe ByteString) + + + -- | Rewinds to the requsted line number. Stops at EOF if it's too big. + -- Returns the reached line number. + , goToLine :: Integer -> IO Integer + + -- | Returns the file 'Handle'. + , getHandle :: Handle + } + +data CursorHandle = CursorHandle + { fileHandle :: Handle + , linesIdx :: MVar ([Integer], Integer) + } + +{- | + +Builds 'LineIndexedCursor'. + +Resets the file handle's ofsset to the beginning. + +Use 'System.IO.hSetNewlineMode' if you want to configure 'System.IO.NewlineMode'. + +-} +mkLineIndexedCursor :: Handle -> IO LineIndexedCursor +mkLineIndexedCursor fileHandle = do + -- reset the handle's offset to the beginning + hSeek fileHandle AbsoluteSeek 0 + + linesIdx <- newMVar ([0], 0) + + let cursorHandle = CursorHandle fileHandle linesIdx + pure $ LineIndexedCursor + { getCurrentLine = getCurrentLine' cursorHandle + , goToLine = goToLine' cursorHandle + , getHandle = fileHandle + } + +getCurrentLine' :: CursorHandle -> IO (Maybe ByteString) +getCurrentLine' CursorHandle{..} = + hIsEOF fileHandle >>= \isEOF -> if isEOF then pure Nothing else do + line <- hGetLine fileHandle + offset <- hTell fileHandle + modifyMVar_ linesIdx $ \(idx, size) -> pure $ + if (not $ offset `elem` idx) + then (offset : idx, size + 1) + else (idx, size) + pure $ Just line + +goToLine' :: CursorHandle -> Integer -> IO Integer +goToLine' CursorHandle{..} ln = do + modifyMVar linesIdx $ \(idx, size) -> do + if ln > size then do + hSeek fileHandle AbsoluteSeek (idx !! 0) + -- try to read until the requested line number + idxTail <- readUntil (ln - size) [] + let newSize = size + (fromIntegral $ length idxTail) + pure ((idxTail ++ idx, newSize), newSize) + else do + let nextSeekIndex = fromIntegral $ size - ln + hSeek fileHandle AbsoluteSeek (idx !! nextSeekIndex) + pure ((idx, size), ln) + where + readUntil 0 idx = pure idx + readUntil counter idx = + hIsEOF fileHandle >>= \isEOF -> if isEOF then pure idx else do + _ <- hGetLine fileHandle + offset <- hTell fileHandle + readUntil (counter - 1) (fromInteger offset : idx) \ No newline at end of file diff --git a/test/Main.hs b/test/Main.hs new file mode 100644 index 0000000..d9c8613 --- /dev/null +++ b/test/Main.hs @@ -0,0 +1,71 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where + +import System.IO +import System.IO.LineIndexedCursor + +import Test.Hspec + +main :: IO () +main = hspec $ do + let + mkCursor = do + h <- openFile "test/testdata" ReadMode + c <- mkLineIndexedCursor h + pure (h, c) + + before mkCursor . after (\(h, _) -> hClose h) + $ describe "System.IO.LineIndexedCursor" $ do + + it "getCurrentLine works" $ \(_, c) -> do + l <- getCurrentLine c + l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + + it "goToLine works" $ \(_, c) -> do + ln <- goToLine c 10 + ln `shouldBe` 10 + + l <- getCurrentLine c + l `shouldBe` Just "Sed elementum velit sit amet orci mollis tincidunt." + + it "goToLine is too big" $ \(_, c) -> do + l <- getCurrentLine c + l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + + ln <- goToLine c 30 + ln `shouldBe` 20 + + l' <- getCurrentLine c + l' `shouldBe` Nothing + + it "read line, then go to beginning and forth" $ \(_, c) -> do + l <- getCurrentLine c + l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + + _ <- getCurrentLine c + _ <- getCurrentLine c + _ <- getCurrentLine c + _ <- getCurrentLine c + _ <- getCurrentLine c + + ln <- goToLine c 0 + ln `shouldBe` 0 + + l' <- getCurrentLine c + l' `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + + ln' <- goToLine c 5 + ln' `shouldBe` 5 + + l'' <- getCurrentLine c + l'' `shouldBe` Just "Curabitur nec dui posuere, tincidunt turpis vitae, tincidunt magna." + + ln'' <- goToLine c 6 + ln'' `shouldBe` 6 + + ln''' <- goToLine c 7 + ln''' `shouldBe` 7 + + ln'''' <- goToLine c 10 + ln'''' `shouldBe` 10 diff --git a/test/testdata b/test/testdata new file mode 100644 index 0000000..b540283 --- /dev/null +++ b/test/testdata @@ -0,0 +1,20 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. +Sed eget lacus id nulla sagittis interdum sit amet ac quam. +Vestibulum vehicula felis ac massa tincidunt, a elementum elit malesuada. +Curabitur nec mi sit amet justo condimentum gravida. +Pellentesque accumsan dolor at nisl pulvinar, ut bibendum diam egestas. +Curabitur nec dui posuere, tincidunt turpis vitae, tincidunt magna. +Duis fringilla orci vitae finibus fermentum. +Pellentesque facilisis nisi sit amet urna elementum, nec blandit neque tincidunt. +Duis efficitur odio non ipsum consequat lobortis. +Curabitur faucibus tortor quis leo ultricies volutpat. +Sed elementum velit sit amet orci mollis tincidunt. +Aliquam vitae est vel odio pharetra fermentum. +Morbi facilisis sem id scelerisque fermentum. +Sed placerat lorem at commodo ornare. +Suspendisse bibendum ex non eros bibendum molestie. +Nullam sed elit quis arcu dapibus aliquam nec sed nunc. +Sed sodales ex a dapibus lacinia. +Duis at nunc et est maximus vestibulum. +Fusce ut justo id ante vehicula dignissim. +Praesent tincidunt eros vel viverra posuere. \ No newline at end of file -- 2.34.1