start implementing hidden git-annex repositories
This adds a separate journal, which does not currently get committed to an index, but is planned to be committed to .git/annex/index-private. Changes that are regarding a UUID that is private will get written to this journal, and so will not be published into the git-annex branch. All log writing should have been made to indicate the UUID it's regarding, though I've not verified this yet. Currently, no UUIDs are treated as private yet, a way to configure that is needed. The implementation is careful to not add any additional IO work when privateUUIDsKnown is False. It will skip looking at the private journal at all. So this should be free, or nearly so, unless the feature is used. When it is used, all branch reads will be about twice as expensive. It is very lucky -- or very prudent design -- that Annex.Branch.change and maybeChange are the only ways to change a file on the branch, and Annex.Branch.set is only internal use. That let Annex.Branch.get always yield any private information that has been recorded, without the risk that Annex.Branch.set might be called, with a non-private UUID, and end up leaking the private information into the git-annex branch. And, this relies on the way git-annex union merges the git-annex branch. When reading a file, there can be a public and a private version, and they are just concacenated together. That will be handled the same as if there were two diverged git-annex branches that got union merged.
This commit is contained in:
parent
b2222e4639
commit
05989556a2
26 changed files with 189 additions and 94 deletions
|
@ -1,6 +1,6 @@
|
|||
{- management of the git-annex branch
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -20,6 +20,7 @@ module Annex.Branch (
|
|||
updateTo,
|
||||
get,
|
||||
getHistorical,
|
||||
RegardingUUID(..),
|
||||
change,
|
||||
maybeChange,
|
||||
commitMessage,
|
||||
|
@ -172,7 +173,7 @@ updateTo' :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
|
|||
updateTo' pairs = do
|
||||
-- ensure branch exists, and get its current ref
|
||||
branchref <- getBranch
|
||||
dirty <- journalDirty
|
||||
dirty <- journalDirty gitAnnexJournalDir
|
||||
ignoredrefs <- getIgnoredRefs
|
||||
let unignoredrefs = excludeset ignoredrefs pairs
|
||||
tomerge <- if null unignoredrefs
|
||||
|
@ -265,9 +266,12 @@ get file = getCache file >>= \case
|
|||
- (Changing the value this returns, and then merging is always the
|
||||
- same as using get, and then changing its value.) -}
|
||||
getLocal :: RawFilePath -> Annex L.ByteString
|
||||
getLocal file = do
|
||||
getLocal = getLocal' (GetPrivate True)
|
||||
|
||||
getLocal' :: GetPrivate -> RawFilePath -> Annex L.ByteString
|
||||
getLocal' getprivate file = do
|
||||
fastDebug "Annex.Branch" ("read " ++ fromRawFilePath file)
|
||||
go =<< getJournalFileStale file
|
||||
go =<< getJournalFileStale getprivate file
|
||||
where
|
||||
go (Just journalcontent) = return journalcontent
|
||||
go Nothing = getRef fullname file
|
||||
|
@ -297,24 +301,36 @@ getRef ref file = withIndex $ catFile ref file
|
|||
- Note that this does not cause the branch to be merged, it only
|
||||
- modifes the current content of the file on the branch.
|
||||
-}
|
||||
change :: Journalable content => RawFilePath -> (L.ByteString -> content) -> Annex ()
|
||||
change file f = lockJournal $ \jl -> f <$> getLocal file >>= set jl file
|
||||
change :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> content) -> Annex ()
|
||||
change ru file f = lockJournal $ \jl -> f <$> getToChange ru file >>= set jl ru file
|
||||
|
||||
{- Applies a function which can modify the content of a file, or not. -}
|
||||
maybeChange :: Journalable content => RawFilePath -> (L.ByteString -> Maybe content) -> Annex ()
|
||||
maybeChange file f = lockJournal $ \jl -> do
|
||||
v <- getLocal file
|
||||
maybeChange :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> Maybe content) -> Annex ()
|
||||
maybeChange ru file f = lockJournal $ \jl -> do
|
||||
v <- getToChange ru file
|
||||
case f v of
|
||||
Just jv ->
|
||||
let b = journalableByteString jv
|
||||
in when (v /= b) $ set jl file b
|
||||
in when (v /= b) $ set jl ru file b
|
||||
_ -> noop
|
||||
|
||||
{- Records new content of a file into the journal -}
|
||||
set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ()
|
||||
set jl f c = do
|
||||
{- Only get private information when the RegardingUUID is itself private. -}
|
||||
getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString
|
||||
getToChange = getLocal' . GetPrivate . regardingPrivateUUID
|
||||
|
||||
{- Records new content of a file into the journal.
|
||||
-
|
||||
- This is not exported; all changes have to be made via change. This
|
||||
- ensures that information that was written to the branch is not
|
||||
- overwritten. Also, it avoids a get followed by a set without taking into
|
||||
- account whether private information was gotten from the private
|
||||
- git-annex index, and should not be written to the public git-annex
|
||||
- branch.
|
||||
-}
|
||||
set :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
|
||||
set jl ru f c = do
|
||||
journalChanged
|
||||
setJournalFile jl f c
|
||||
setJournalFile jl ru f c
|
||||
fastDebug "Annex.Branch" ("set " ++ fromRawFilePath f)
|
||||
-- Could cache the new content, but it would involve
|
||||
-- evaluating a Journalable Builder twice, which is not very
|
||||
|
@ -329,7 +345,7 @@ commitMessage = fromMaybe "update" . annexCommitMessage <$> Annex.getGitConfig
|
|||
|
||||
{- Stages the journal, and commits staged changes to the branch. -}
|
||||
commit :: String -> Annex ()
|
||||
commit = whenM journalDirty . forceCommit
|
||||
commit = whenM (journalDirty gitAnnexJournalDir) . forceCommit
|
||||
|
||||
{- Commits the current index to the branch even without any journalled
|
||||
- changes. -}
|
||||
|
@ -407,11 +423,14 @@ files :: Annex ([RawFilePath], IO Bool)
|
|||
files = do
|
||||
_ <- update
|
||||
(bfs, cleanup) <- branchFiles
|
||||
-- ++ forces the content of the first list to be buffered in memory,
|
||||
-- so use getJournalledFilesStale which should be much smaller most
|
||||
-- of the time. branchFiles will stream as the list is consumed.
|
||||
l <- (++)
|
||||
<$> getJournalledFilesStale
|
||||
-- ++ forces the content of all but the last list to be buffered in
|
||||
-- memory, so use getJournalledFilesStale which should be much smaller
|
||||
-- most of the time. branchFiles will stream as the list is consumed.
|
||||
l <- (\a b c -> a ++ b ++ c)
|
||||
<$> (if privateUUIDsKnown
|
||||
then getJournalledFilesStale gitAnnexPrivateJournalDir
|
||||
else pure [])
|
||||
<*> (getJournalledFilesStale gitAnnexJournalDir)
|
||||
<*> pure bfs
|
||||
return (l, cleanup)
|
||||
|
||||
|
@ -520,7 +539,7 @@ stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
|
|||
let dir = gitAnnexJournalDir g
|
||||
(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
|
||||
h <- hashObjectHandle
|
||||
withJournalHandle $ \jh ->
|
||||
withJournalHandle gitAnnexJournalDir $ \jh ->
|
||||
Git.UpdateIndex.streamUpdateIndex g
|
||||
[genstream dir h jh jlogh]
|
||||
commitindex
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
- git-annex branch. Among other things, it ensures that if git-annex is
|
||||
- interrupted, its recorded data is not lost.
|
||||
-
|
||||
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -39,6 +39,24 @@ instance Journalable Builder where
|
|||
writeJournalHandle = hPutBuilder
|
||||
journalableByteString = toLazyByteString
|
||||
|
||||
{- When a file in the git-annex branch is changed, this indicates what
|
||||
- repository UUID (or in some cases, UUIDs) a change is regarding.
|
||||
-
|
||||
- Using this lets changes regarding private UUIDs be written to the
|
||||
- private index, rather than to the main branch index, so it does
|
||||
- not get exposed to other remotes.
|
||||
-}
|
||||
data RegardingUUID = RegardingUUID [UUID]
|
||||
|
||||
regardingPrivateUUID :: RegardingUUID -> Bool
|
||||
regardingPrivateUUID _ = False -- TODO
|
||||
|
||||
-- Are any private UUIDs known to exist? If so, extra work has to be done,
|
||||
-- to check for information separately recorded for them, outside the usual
|
||||
-- locations.
|
||||
privateUUIDsKnown :: Bool
|
||||
privateUUIDsKnown = False -- TODO
|
||||
|
||||
{- Records content for a file in the branch to the journal.
|
||||
-
|
||||
- Using the journal, rather than immediatly staging content to the index
|
||||
|
@ -48,20 +66,25 @@ instance Journalable Builder where
|
|||
- getJournalFileStale to always return a consistent journal file
|
||||
- content, although possibly not the most current one.
|
||||
-}
|
||||
setJournalFile :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ()
|
||||
setJournalFile _jl file content = withOtherTmp $ \tmp -> do
|
||||
createAnnexDirectory =<< fromRepo gitAnnexJournalDir
|
||||
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
|
||||
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
||||
jd <- fromRepo $ if regardingPrivateUUID ru
|
||||
then gitAnnexPrivateJournalDir
|
||||
else gitAnnexJournalDir
|
||||
createAnnexDirectory jd
|
||||
-- journal file is written atomically
|
||||
jfile <- fromRepo (journalFile file)
|
||||
let tmpfile = fromRawFilePath (tmp P.</> P.takeFileName jfile)
|
||||
let jfile = journalFile file
|
||||
let tmpfile = fromRawFilePath (tmp P.</> jfile)
|
||||
liftIO $ do
|
||||
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
|
||||
moveFile tmpfile (fromRawFilePath jfile)
|
||||
moveFile tmpfile (fromRawFilePath (jd P.</> jfile))
|
||||
|
||||
{- Gets any journalled content for a file in the branch. -}
|
||||
getJournalFile :: JournalLocked -> RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getJournalFile _jl = getJournalFileStale
|
||||
|
||||
data GetPrivate = GetPrivate Bool
|
||||
|
||||
{- Without locking, this is not guaranteed to be the most recent
|
||||
- version of the file in the journal, so should not be used as a basis for
|
||||
- changes.
|
||||
|
@ -73,42 +96,55 @@ getJournalFile _jl = getJournalFileStale
|
|||
- concurrency or other issues with a lazy read, and the minor loss of
|
||||
- laziness doesn't matter much, as the files are not very large.
|
||||
-}
|
||||
getJournalFileStale :: RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
|
||||
L.fromStrict <$> S.readFile (fromRawFilePath $ journalFile file g)
|
||||
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getJournalFileStale (GetPrivate getprivate) file = inRepo $ \g ->
|
||||
if getprivate
|
||||
then do
|
||||
x <- getfrom (gitAnnexJournalDir g)
|
||||
y <- getfrom (gitAnnexPrivateJournalDir g)
|
||||
-- This concacenation is the same as happens in a
|
||||
-- merge of two git-annex branches.
|
||||
return (x <> y)
|
||||
else getfrom (gitAnnexJournalDir g)
|
||||
where
|
||||
jfile = journalFile file
|
||||
getfrom d = catchMaybeIO $
|
||||
L.fromStrict <$> S.readFile (fromRawFilePath (d P.</> jfile))
|
||||
|
||||
{- List of existing journal files, but without locking, may miss new ones
|
||||
- just being added, or may have false positives if the journal is staged
|
||||
- as it is run. -}
|
||||
getJournalledFilesStale :: Annex [RawFilePath]
|
||||
getJournalledFilesStale = do
|
||||
{- List of existing journal files in a journal directory, but without locking,
|
||||
- may miss new ones just being added, or may have false positives if the
|
||||
- journal is staged as it is run. -}
|
||||
getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath]
|
||||
getJournalledFilesStale getjournaldir = do
|
||||
g <- gitRepo
|
||||
fs <- liftIO $ catchDefaultIO [] $
|
||||
getDirectoryContents $ fromRawFilePath $ gitAnnexJournalDir g
|
||||
getDirectoryContents $ fromRawFilePath (getjournaldir g)
|
||||
return $ filter (`notElem` [".", ".."]) $
|
||||
map (fileJournal . toRawFilePath) fs
|
||||
|
||||
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
|
||||
withJournalHandle a = do
|
||||
d <- fromRawFilePath <$> fromRepo gitAnnexJournalDir
|
||||
{- Directory handle open on a journal directory. -}
|
||||
withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
|
||||
withJournalHandle getjournaldir a = do
|
||||
d <- fromRawFilePath <$> fromRepo getjournaldir
|
||||
bracketIO (openDirectory d) closeDirectory (liftIO . a)
|
||||
|
||||
{- Checks if there are changes in the journal. -}
|
||||
journalDirty :: Annex Bool
|
||||
journalDirty = do
|
||||
d <- fromRawFilePath <$> fromRepo gitAnnexJournalDir
|
||||
journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool
|
||||
journalDirty getjournaldir = do
|
||||
d <- fromRawFilePath <$> fromRepo getjournaldir
|
||||
liftIO $
|
||||
(not <$> isDirectoryEmpty d)
|
||||
`catchIO` (const $ doesDirectoryExist d)
|
||||
|
||||
{- Produces a filename to use in the journal for a file on the branch.
|
||||
- The filename does not include the journal directory.
|
||||
-
|
||||
- The journal typically won't have a lot of files in it, so the hashing
|
||||
- used in the branch is not necessary, and all the files are put directly
|
||||
- in the journal directory.
|
||||
-}
|
||||
journalFile :: RawFilePath -> Git.Repo -> RawFilePath
|
||||
journalFile file repo = gitAnnexJournalDir repo P.</> S.concatMap mangle file
|
||||
journalFile :: RawFilePath -> RawFilePath
|
||||
journalFile file = S.concatMap mangle file
|
||||
where
|
||||
mangle c
|
||||
| P.isPathSeparator c = S.singleton underscore
|
||||
|
|
|
@ -63,9 +63,11 @@ module Annex.Locations (
|
|||
gitAnnexFeedState,
|
||||
gitAnnexMergeDir,
|
||||
gitAnnexJournalDir,
|
||||
gitAnnexPrivateJournalDir,
|
||||
gitAnnexJournalLock,
|
||||
gitAnnexGitQueueLock,
|
||||
gitAnnexIndex,
|
||||
gitAnnexPrivateIndex,
|
||||
gitAnnexIndexStatus,
|
||||
gitAnnexViewIndex,
|
||||
gitAnnexViewLog,
|
||||
|
@ -431,6 +433,12 @@ gitAnnexJournalDir :: Git.Repo -> RawFilePath
|
|||
gitAnnexJournalDir r =
|
||||
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
|
||||
|
||||
{- .git/annex/journal.private/ is used to journal changes regarding private
|
||||
- repositories. -}
|
||||
gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath
|
||||
gitAnnexPrivateJournalDir r =
|
||||
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal-private"
|
||||
|
||||
{- Lock file for the journal. -}
|
||||
gitAnnexJournalLock :: Git.Repo -> RawFilePath
|
||||
gitAnnexJournalLock r = gitAnnexDir r P.</> "journal.lck"
|
||||
|
@ -444,6 +452,11 @@ gitAnnexGitQueueLock r = gitAnnexDir r P.</> "gitqueue.lck"
|
|||
gitAnnexIndex :: Git.Repo -> RawFilePath
|
||||
gitAnnexIndex r = gitAnnexDir r P.</> "index"
|
||||
|
||||
{- .git/annex/index-private is used to store information that is not to
|
||||
- be exposed to the git-annex branch. -}
|
||||
gitAnnexPrivateIndex :: Git.Repo -> RawFilePath
|
||||
gitAnnexPrivateIndex r = gitAnnexDir r P.</> "index-private"
|
||||
|
||||
{- Holds the ref of the git-annex branch that the index was last updated to.
|
||||
-
|
||||
- The .lck in the name is a historical accident; this is not used as a
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue