Windows: Fix CRLF handling in some log files
In particular, the mergedrefs file was written with CR added to each line, but read without CRLF handling. This resulted in each update of the file adding CR to each line in it, growing the number of lines, while also preventing the optimisation from working, so it remerged unncessarily. writeFile and readFile do NewlineMode translation on Windows. But the ByteString conversion prevented that from happening any longer. I've audited for other cases of this, and found three more (.git/annex/index.lck, .git/annex/ignoredrefs, and .git/annex/import/). All of those also only prevent optimisations from working. Some other files are currently both read and written with ByteString, but old git-annex may have written them with NewlineMode translation. Other files are at risk for breakage later if the reader gets converted to ByteString. This is a minimal fix, but should be enough, as long as I remember to use fileLines when splitting a ByteString into lines. This leaves files written using ByteString without CR added, but that's ok because old git-annex has no difficulty reading such files. When the mergedrefs file has gotten lines that end with "\r\r\r\n", this will eventually clean it up. Each update will remove a single trailing CR. Note that S8.lines is still used in eg Command.Unused, where it is parsing git show-ref, and similar in Git/*. git commands don't include CR in their output so that's ok. Sponsored-by: Joshua Antonishen on Patreon
This commit is contained in:
parent
ea2876ae77
commit
eb42935e58
3 changed files with 41 additions and 7 deletions
|
@ -889,7 +889,7 @@ ignoreRefs rs = do
|
|||
|
||||
getIgnoredRefs :: Annex (S.Set Git.Sha)
|
||||
getIgnoredRefs =
|
||||
S.fromList . mapMaybe Git.Sha.extractSha . B8.lines <$> content
|
||||
S.fromList . mapMaybe Git.Sha.extractSha . fileLines' <$> content
|
||||
where
|
||||
content = do
|
||||
f <- fromRawFilePath <$> fromRepo gitAnnexIgnoredRefs
|
||||
|
@ -912,7 +912,7 @@ getMergedRefs' :: Annex [(Git.Sha, Git.Branch)]
|
|||
getMergedRefs' = do
|
||||
f <- fromRawFilePath <$> fromRepo gitAnnexMergedRefs
|
||||
s <- liftIO $ catchDefaultIO mempty $ B.readFile f
|
||||
return $ map parse $ B8.lines s
|
||||
return $ map parse $ fileLines' s
|
||||
where
|
||||
parse l =
|
||||
let (s, b) = separate' (== (fromIntegral (ord '\t'))) l
|
||||
|
|
|
@ -5,6 +5,7 @@ git-annex (10.20230927) UNRELEASED; urgency=medium
|
|||
* importfeed: Use caching database to avoid needing to list urls
|
||||
on every run, and avoid using too much memory.
|
||||
* Improve memory use of --all when using annex.private.
|
||||
* Windows: Fix CRLF handling in some log files.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400
|
||||
|
||||
|
|
43
Logs/File.hs
43
Logs/File.hs
|
@ -1,11 +1,11 @@
|
|||
{- git-annex log files
|
||||
-
|
||||
- Copyright 2018-2022 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2018-2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE CPP, BangPatterns #-}
|
||||
|
||||
module Logs.File (
|
||||
writeLogFile,
|
||||
|
@ -17,6 +17,8 @@ module Logs.File (
|
|||
checkLogFile,
|
||||
calcLogFile,
|
||||
calcLogFileUnsafe,
|
||||
fileLines,
|
||||
fileLines',
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -25,6 +27,8 @@ import Annex.LockFile
|
|||
import Annex.ReplaceFile
|
||||
import Utility.Tmp
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.ByteString.Lazy.Char8 as L8
|
||||
|
||||
|
@ -74,7 +78,7 @@ appendLogFile f lck c =
|
|||
modifyLogFile :: RawFilePath -> RawFilePath -> ([L.ByteString] -> [L.ByteString]) -> Annex ()
|
||||
modifyLogFile f lck modf = withExclusiveLock lck $ do
|
||||
ls <- liftIO $ fromMaybe []
|
||||
<$> tryWhenExists (L8.lines <$> L.readFile f')
|
||||
<$> tryWhenExists (fileLines <$> L.readFile f')
|
||||
let ls' = modf ls
|
||||
when (ls' /= ls) $
|
||||
createDirWhenNeeded f $
|
||||
|
@ -94,7 +98,7 @@ checkLogFile f lck matchf = withSharedLock lck $ bracket setup cleanup go
|
|||
cleanup (Just h) = liftIO $ hClose h
|
||||
go Nothing = return False
|
||||
go (Just h) = do
|
||||
!r <- liftIO (any matchf . L8.lines <$> L.hGetContents h)
|
||||
!r <- liftIO (any matchf . fileLines <$> L.hGetContents h)
|
||||
return r
|
||||
f' = fromRawFilePath f
|
||||
|
||||
|
@ -111,7 +115,7 @@ calcLogFileUnsafe f start update = bracket setup cleanup go
|
|||
cleanup Nothing = noop
|
||||
cleanup (Just h) = liftIO $ hClose h
|
||||
go Nothing = return start
|
||||
go (Just h) = go' start =<< liftIO (L8.lines <$> L.hGetContents h)
|
||||
go (Just h) = go' start =<< liftIO (fileLines <$> L.hGetContents h)
|
||||
go' v [] = return v
|
||||
go' v (l:ls) = do
|
||||
let !v' = update l v
|
||||
|
@ -157,3 +161,32 @@ createDirWhenNeeded f a = a `catchNonAsync` \_e -> do
|
|||
-- done if writing the file fails.
|
||||
createAnnexDirectory (parentDir f)
|
||||
a
|
||||
|
||||
-- On windows, readFile does NewlineMode translation,
|
||||
-- stripping CR before LF. When converting to ByteString,
|
||||
-- use this to emulate that.
|
||||
fileLines :: L.ByteString -> [L.ByteString]
|
||||
#ifdef mingw32_HOST_OS
|
||||
fileLines = map stripCR . L8.lines
|
||||
where
|
||||
stripCR b = case L8.unsnoc b of
|
||||
Nothing -> b
|
||||
Just (b', e)
|
||||
| e == '\r' -> b'
|
||||
| otherwise -> b
|
||||
#else
|
||||
fileLines = L8.lines
|
||||
#endif
|
||||
|
||||
fileLines' :: S.ByteString -> [S.ByteString]
|
||||
#ifdef mingw32_HOST_OS
|
||||
fileLines' = map stripCR . S8.lines
|
||||
where
|
||||
stripCR b = case S8.unsnoc b of
|
||||
Nothing -> b
|
||||
Just (b', e)
|
||||
| e == '\r' -> b'
|
||||
| otherwise -> b
|
||||
#else
|
||||
fileLines' = S8.lines
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue