2012-10-19 18:25:15 +00:00
|
|
|
{- management of the git-annex journal
|
2011-12-12 22:03:28 +00:00
|
|
|
-
|
|
|
|
- The journal is used to queue up changes before they are committed to the
|
2013-10-03 18:41:57 +00:00
|
|
|
- git-annex branch. Among other things, it ensures that if git-annex is
|
2011-12-12 22:03:28 +00:00
|
|
|
- interrupted, its recorded data is not lost.
|
|
|
|
-
|
2019-01-03 17:21:48 +00:00
|
|
|
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
2011-12-12 22:03:28 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2011-12-12 22:03:28 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Annex.Journal where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2011-12-12 22:03:28 +00:00
|
|
|
import qualified Git
|
2012-04-21 20:59:49 +00:00
|
|
|
import Annex.Perms
|
2019-01-17 19:40:44 +00:00
|
|
|
import Annex.Tmp
|
2014-07-10 04:32:23 +00:00
|
|
|
import Annex.LockFile
|
2017-12-31 20:08:31 +00:00
|
|
|
import Utility.Directory.Stream
|
2014-01-28 18:17:14 +00:00
|
|
|
|
2019-01-03 17:21:48 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
import qualified Data.ByteString as S
|
2019-12-11 18:12:22 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2019-01-03 17:21:48 +00:00
|
|
|
import Data.ByteString.Builder
|
2019-12-11 18:12:22 +00:00
|
|
|
import Data.Char
|
2019-01-03 17:21:48 +00:00
|
|
|
|
|
|
|
class Journalable t where
|
|
|
|
writeJournalHandle :: Handle -> t -> IO ()
|
|
|
|
journalableByteString :: t -> L.ByteString
|
|
|
|
|
|
|
|
instance Journalable L.ByteString where
|
|
|
|
writeJournalHandle = L.hPut
|
|
|
|
journalableByteString = id
|
|
|
|
|
|
|
|
-- This is more efficient than the ByteString instance.
|
|
|
|
instance Journalable Builder where
|
|
|
|
writeJournalHandle = hPutBuilder
|
|
|
|
journalableByteString = toLazyByteString
|
|
|
|
|
2011-12-12 22:03:28 +00:00
|
|
|
{- Records content for a file in the branch to the journal.
|
|
|
|
-
|
|
|
|
- Using the journal, rather than immediatly staging content to the index
|
2013-10-03 18:41:57 +00:00
|
|
|
- avoids git needing to rewrite the index after every change.
|
|
|
|
-
|
|
|
|
- The file in the journal is updated atomically, which allows
|
|
|
|
- getJournalFileStale to always return a consistent journal file
|
|
|
|
- content, although possibly not the most current one.
|
|
|
|
-}
|
2019-11-26 19:27:22 +00:00
|
|
|
setJournalFile :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ()
|
2019-01-17 19:40:44 +00:00
|
|
|
setJournalFile _jl file content = withOtherTmp $ \tmp -> do
|
2012-04-21 20:59:49 +00:00
|
|
|
createAnnexDirectory =<< fromRepo gitAnnexJournalDir
|
|
|
|
-- journal file is written atomically
|
2020-10-29 16:02:46 +00:00
|
|
|
jfile <- fromRepo (journalFile file)
|
|
|
|
let tmpfile = fromRawFilePath (tmp P.</> P.takeFileName jfile)
|
2012-04-21 20:59:49 +00:00
|
|
|
liftIO $ do
|
2019-01-03 17:21:48 +00:00
|
|
|
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
|
2020-10-29 16:02:46 +00:00
|
|
|
moveFile tmpfile (fromRawFilePath jfile)
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- Gets any journalled content for a file in the branch. -}
|
2019-11-26 19:27:22 +00:00
|
|
|
getJournalFile :: JournalLocked -> RawFilePath -> Annex (Maybe L.ByteString)
|
2013-10-03 18:41:57 +00:00
|
|
|
getJournalFile _jl = getJournalFileStale
|
|
|
|
|
|
|
|
{- Without locking, this is not guaranteed to be the most recent
|
|
|
|
- version of the file in the journal, so should not be used as a basis for
|
2019-01-03 17:21:48 +00:00
|
|
|
- changes.
|
|
|
|
-
|
|
|
|
- The file is read strictly so that its content can safely be fed into
|
|
|
|
- an operation that modifies the file. While setJournalFile doesn't
|
|
|
|
- write directly to journal files and so probably avoids problems with
|
|
|
|
- writing to the same file that's being read, but there could be
|
|
|
|
- concurrency or other issues with a lazy read, and the minor loss of
|
|
|
|
- laziness doesn't matter much, as the files are not very large.
|
|
|
|
-}
|
2019-11-26 19:27:22 +00:00
|
|
|
getJournalFileStale :: RawFilePath -> Annex (Maybe L.ByteString)
|
2013-10-03 18:41:57 +00:00
|
|
|
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
|
2019-12-11 18:12:22 +00:00
|
|
|
L.fromStrict <$> S.readFile (fromRawFilePath $ journalFile file g)
|
2011-12-12 22:03:28 +00:00
|
|
|
|
2018-05-08 17:54:42 +00:00
|
|
|
{- List of existing journal files, but without locking, may miss new ones
|
|
|
|
- just being added, or may have false positives if the journal is staged
|
|
|
|
- as it is run. -}
|
|
|
|
getJournalledFilesStale :: Annex [FilePath]
|
|
|
|
getJournalledFilesStale = do
|
|
|
|
g <- gitRepo
|
|
|
|
fs <- liftIO $ catchDefaultIO [] $
|
2020-10-29 16:02:46 +00:00
|
|
|
getDirectoryContents $ fromRawFilePath $ gitAnnexJournalDir g
|
2019-12-11 18:12:22 +00:00
|
|
|
return $ filter (`notElem` [".", ".."]) $
|
|
|
|
map (fromRawFilePath . fileJournal . toRawFilePath) fs
|
2018-05-08 17:54:42 +00:00
|
|
|
|
2014-07-10 03:36:53 +00:00
|
|
|
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
|
|
|
|
withJournalHandle a = do
|
2020-10-29 16:02:46 +00:00
|
|
|
d <- fromRawFilePath <$> fromRepo gitAnnexJournalDir
|
2014-07-10 03:36:53 +00:00
|
|
|
bracketIO (openDirectory d) closeDirectory (liftIO . a)
|
|
|
|
|
2011-12-12 22:03:28 +00:00
|
|
|
{- Checks if there are changes in the journal. -}
|
|
|
|
journalDirty :: Annex Bool
|
2014-07-10 04:16:53 +00:00
|
|
|
journalDirty = do
|
2020-10-29 16:02:46 +00:00
|
|
|
d <- fromRawFilePath <$> fromRepo gitAnnexJournalDir
|
2014-07-10 04:16:53 +00:00
|
|
|
liftIO $
|
|
|
|
(not <$> isDirectoryEmpty d)
|
|
|
|
`catchIO` (const $ doesDirectoryExist d)
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- Produces a filename to use in the journal for a file on the branch.
|
|
|
|
-
|
|
|
|
- The journal typically won't have a lot of files in it, so the hashing
|
|
|
|
- used in the branch is not necessary, and all the files are put directly
|
|
|
|
- in the journal directory.
|
|
|
|
-}
|
2019-12-11 18:12:22 +00:00
|
|
|
journalFile :: RawFilePath -> Git.Repo -> RawFilePath
|
2019-12-18 15:29:34 +00:00
|
|
|
journalFile file repo = gitAnnexJournalDir' repo P.</> S.concatMap mangle file
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2013-05-12 20:37:32 +00:00
|
|
|
mangle c
|
2019-12-18 15:29:34 +00:00
|
|
|
| P.isPathSeparator c = S.singleton underscore
|
|
|
|
| c == underscore = S.pack [underscore, underscore]
|
|
|
|
| otherwise = S.singleton c
|
|
|
|
underscore = fromIntegral (ord '_')
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- Converts a journal file (relative to the journal dir) back to the
|
|
|
|
- filename on the branch. -}
|
2019-12-11 18:12:22 +00:00
|
|
|
fileJournal :: RawFilePath -> RawFilePath
|
2019-12-18 15:29:34 +00:00
|
|
|
fileJournal = go
|
2019-12-11 18:12:22 +00:00
|
|
|
where
|
2019-12-18 15:29:34 +00:00
|
|
|
go b =
|
|
|
|
let (h, t) = S.break (== underscore) b
|
|
|
|
in h <> case S.uncons t of
|
|
|
|
Nothing -> t
|
|
|
|
Just (_u, t') -> case S.uncons t' of
|
|
|
|
Nothing -> t'
|
|
|
|
Just (w, t'')
|
|
|
|
| w == underscore ->
|
|
|
|
S.cons underscore (go t'')
|
|
|
|
| otherwise ->
|
|
|
|
S.cons P.pathSeparator (go t')
|
|
|
|
|
|
|
|
underscore = fromIntegral (ord '_')
|
2011-12-12 22:03:28 +00:00
|
|
|
|
2013-10-03 18:41:57 +00:00
|
|
|
{- Sentinal value, only produced by lockJournal; required
|
|
|
|
- as a parameter by things that need to ensure the journal is
|
|
|
|
- locked. -}
|
|
|
|
data JournalLocked = ProduceJournalLocked
|
|
|
|
|
2011-12-12 22:03:28 +00:00
|
|
|
{- Runs an action that modifies the journal, using locking to avoid
|
|
|
|
- contention with other git-annex processes. -}
|
2013-10-03 18:41:57 +00:00
|
|
|
lockJournal :: (JournalLocked -> Annex a) -> Annex a
|
2014-07-10 04:32:23 +00:00
|
|
|
lockJournal a = withExclusiveLock gitAnnexJournalLock $ a ProduceJournalLocked
|