2012-10-19 18:25:15 +00:00
|
|
|
{- management of the git-annex journal
|
2011-12-12 22:03:28 +00:00
|
|
|
-
|
|
|
|
- The journal is used to queue up changes before they are committed to the
|
2013-10-03 18:41:57 +00:00
|
|
|
- git-annex branch. Among other things, it ensures that if git-annex is
|
2011-12-12 22:03:28 +00:00
|
|
|
- interrupted, its recorded data is not lost.
|
|
|
|
-
|
2013-10-03 18:41:57 +00:00
|
|
|
- Copyright 2011-2013 Joey Hess <joey@kitenet.net>
|
2011-12-12 22:03:28 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
2013-05-11 20:03:00 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
2011-12-12 22:03:28 +00:00
|
|
|
module Annex.Journal where
|
|
|
|
|
|
|
|
import Common.Annex
|
|
|
|
import Annex.Exception
|
|
|
|
import qualified Git
|
2012-04-21 20:59:49 +00:00
|
|
|
import Annex.Perms
|
2011-12-12 22:03:28 +00:00
|
|
|
|
2014-01-28 18:17:14 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
|
|
|
import Utility.WinLock
|
|
|
|
#endif
|
|
|
|
|
2011-12-12 22:03:28 +00:00
|
|
|
{- Records content for a file in the branch to the journal.
|
|
|
|
-
|
|
|
|
- Using the journal, rather than immediatly staging content to the index
|
2013-10-03 18:41:57 +00:00
|
|
|
- avoids git needing to rewrite the index after every change.
|
|
|
|
-
|
|
|
|
- The file in the journal is updated atomically, which allows
|
|
|
|
- getJournalFileStale to always return a consistent journal file
|
|
|
|
- content, although possibly not the most current one.
|
|
|
|
-}
|
|
|
|
setJournalFile :: JournalLocked -> FilePath -> String -> Annex ()
|
|
|
|
setJournalFile _jl file content = do
|
2014-02-26 20:52:56 +00:00
|
|
|
tmp <- fromRepo gitAnnexTmpMiscDir
|
2012-04-21 20:59:49 +00:00
|
|
|
createAnnexDirectory =<< fromRepo gitAnnexJournalDir
|
2014-02-26 20:52:56 +00:00
|
|
|
createAnnexDirectory tmp
|
2012-04-21 20:59:49 +00:00
|
|
|
-- journal file is written atomically
|
|
|
|
jfile <- fromRepo $ journalFile file
|
|
|
|
let tmpfile = tmp </> takeFileName jfile
|
|
|
|
liftIO $ do
|
Fix encoding of data written to git-annex branch. Avoid truncating unicode characters to 8 bits.
Allow any encoding to be used, as with filenames (but utf8 is the sane
choice). Affects metadata and repository descriptions, and preferred
content expressions.
The question of what's the right encoding for the git-annex branch is a
vexing one. utf-8 would be a nice choice, but this leaves the possibility
of bad data getting into a git-annex branch somehow, and this resulting in
git-annex crashing with encoding errors, which is a failure mode I want to
avoid.
(Also, preferred content expressions can refer to filenames, and filenames
can have any encoding, so limiting to utf-8 would not be ideal.)
The union merge code already took care to not assume any encoding for a
file. Except it assumes that any \n is a literal newline, and not part of
some encoding of a character that happens to contain a newline. (At least
utf-8 avoids using newline for anything except liternal newlines.)
Adapted the git-annex branch code to use this same approach.
Note that there is a potential interop problem with Windows, since
FileSystemEncoding doesn't work there, and instead things are always
decoded as utf-8. If someone uses non-utf8 encoding for data on the
git-annex branch, this can lead to an encoding error on windows. However,
this commit doesn't actually make that any worse, because the union merge
code would similarly fail with an encoding error on windows in that
situation.
This commit was sponsored by Kyle Meyer.
2014-05-27 18:16:33 +00:00
|
|
|
writeFileAnyEncoding tmpfile content
|
2012-04-21 20:59:49 +00:00
|
|
|
moveFile tmpfile jfile
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- Gets any journalled content for a file in the branch. -}
|
2013-10-03 18:41:57 +00:00
|
|
|
getJournalFile :: JournalLocked -> FilePath -> Annex (Maybe String)
|
|
|
|
getJournalFile _jl = getJournalFileStale
|
|
|
|
|
|
|
|
{- Without locking, this is not guaranteed to be the most recent
|
|
|
|
- version of the file in the journal, so should not be used as a basis for
|
|
|
|
- changes. -}
|
|
|
|
getJournalFileStale :: FilePath -> Annex (Maybe String)
|
|
|
|
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
|
Fix encoding of data written to git-annex branch. Avoid truncating unicode characters to 8 bits.
Allow any encoding to be used, as with filenames (but utf8 is the sane
choice). Affects metadata and repository descriptions, and preferred
content expressions.
The question of what's the right encoding for the git-annex branch is a
vexing one. utf-8 would be a nice choice, but this leaves the possibility
of bad data getting into a git-annex branch somehow, and this resulting in
git-annex crashing with encoding errors, which is a failure mode I want to
avoid.
(Also, preferred content expressions can refer to filenames, and filenames
can have any encoding, so limiting to utf-8 would not be ideal.)
The union merge code already took care to not assume any encoding for a
file. Except it assumes that any \n is a literal newline, and not part of
some encoding of a character that happens to contain a newline. (At least
utf-8 avoids using newline for anything except liternal newlines.)
Adapted the git-annex branch code to use this same approach.
Note that there is a potential interop problem with Windows, since
FileSystemEncoding doesn't work there, and instead things are always
decoded as utf-8. If someone uses non-utf8 encoding for data on the
git-annex branch, this can lead to an encoding error on windows. However,
this commit doesn't actually make that any worse, because the union merge
code would similarly fail with an encoding error on windows in that
situation.
This commit was sponsored by Kyle Meyer.
2014-05-27 18:16:33 +00:00
|
|
|
readFileStrictAnyEncoding $ journalFile file g
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- List of files that have updated content in the journal. -}
|
2013-10-03 18:41:57 +00:00
|
|
|
getJournalledFiles :: JournalLocked -> Annex [FilePath]
|
|
|
|
getJournalledFiles jl = map fileJournal <$> getJournalFiles jl
|
|
|
|
|
|
|
|
getJournalledFilesStale :: Annex [FilePath]
|
|
|
|
getJournalledFilesStale = map fileJournal <$> getJournalFilesStale
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- List of existing journal files. -}
|
2013-10-03 18:41:57 +00:00
|
|
|
getJournalFiles :: JournalLocked -> Annex [FilePath]
|
|
|
|
getJournalFiles _jl = getJournalFilesStale
|
|
|
|
|
|
|
|
{- List of existing journal files, but without locking, may miss new ones
|
|
|
|
- just being added, or may have false positives if the journal is staged
|
|
|
|
- as it is run. -}
|
|
|
|
getJournalFilesStale :: Annex [FilePath]
|
|
|
|
getJournalFilesStale = do
|
2011-12-12 22:03:28 +00:00
|
|
|
g <- gitRepo
|
2012-09-17 04:18:07 +00:00
|
|
|
fs <- liftIO $ catchDefaultIO [] $
|
|
|
|
getDirectoryContents $ gitAnnexJournalDir g
|
2011-12-12 22:03:28 +00:00
|
|
|
return $ filter (`notElem` [".", ".."]) fs
|
|
|
|
|
|
|
|
{- Checks if there are changes in the journal. -}
|
|
|
|
journalDirty :: Annex Bool
|
2013-10-03 18:41:57 +00:00
|
|
|
journalDirty = not . null <$> getJournalFilesStale
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- Produces a filename to use in the journal for a file on the branch.
|
|
|
|
-
|
|
|
|
- The journal typically won't have a lot of files in it, so the hashing
|
|
|
|
- used in the branch is not necessary, and all the files are put directly
|
|
|
|
- in the journal directory.
|
|
|
|
-}
|
2012-04-21 20:59:49 +00:00
|
|
|
journalFile :: FilePath -> Git.Repo -> FilePath
|
|
|
|
journalFile file repo = gitAnnexJournalDir repo </> concatMap mangle file
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2013-05-12 20:37:32 +00:00
|
|
|
mangle c
|
|
|
|
| c == pathSeparator = "_"
|
2013-05-12 20:43:59 +00:00
|
|
|
| c == '_' = "__"
|
|
|
|
| otherwise = [c]
|
2011-12-12 22:03:28 +00:00
|
|
|
|
|
|
|
{- Converts a journal file (relative to the journal dir) back to the
|
|
|
|
- filename on the branch. -}
|
|
|
|
fileJournal :: FilePath -> FilePath
|
2013-05-12 20:37:32 +00:00
|
|
|
fileJournal = replace [pathSeparator, pathSeparator] "_" .
|
|
|
|
replace "_" [pathSeparator]
|
2011-12-12 22:03:28 +00:00
|
|
|
|
2013-10-03 18:41:57 +00:00
|
|
|
{- Sentinal value, only produced by lockJournal; required
|
|
|
|
- as a parameter by things that need to ensure the journal is
|
|
|
|
- locked. -}
|
|
|
|
data JournalLocked = ProduceJournalLocked
|
|
|
|
|
2011-12-12 22:03:28 +00:00
|
|
|
{- Runs an action that modifies the journal, using locking to avoid
|
|
|
|
- contention with other git-annex processes. -}
|
2013-10-03 18:41:57 +00:00
|
|
|
lockJournal :: (JournalLocked -> Annex a) -> Annex a
|
2011-12-12 22:03:28 +00:00
|
|
|
lockJournal a = do
|
2013-05-11 20:03:00 +00:00
|
|
|
lockfile <- fromRepo gitAnnexJournalLock
|
|
|
|
createAnnexDirectory $ takeDirectory lockfile
|
2012-04-21 20:59:49 +00:00
|
|
|
mode <- annexFileMode
|
2013-10-03 18:41:57 +00:00
|
|
|
bracketIO (lock lockfile mode) unlock (const $ a ProduceJournalLocked)
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2013-08-02 16:27:32 +00:00
|
|
|
#ifndef mingw32_HOST_OS
|
2013-08-04 17:54:09 +00:00
|
|
|
lock lockfile mode = do
|
2013-05-11 20:03:00 +00:00
|
|
|
l <- noUmask mode $ createFile lockfile mode
|
2012-12-13 04:24:19 +00:00
|
|
|
waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0)
|
|
|
|
return l
|
|
|
|
unlock = closeFd
|
2013-05-11 20:03:00 +00:00
|
|
|
#else
|
2014-01-28 18:17:14 +00:00
|
|
|
lock lockfile _mode = waitToLock $ lockExclusive lockfile
|
|
|
|
unlock = dropLock
|
2013-05-11 20:03:00 +00:00
|
|
|
#endif
|