start implementing hidden git-annex repositories

This adds a separate journal, which does not currently get committed to
an index, but is planned to be committed to .git/annex/index-private.

Changes that are regarding a UUID that is private will get written to
this journal, and so will not be published into the git-annex branch.

All log writing should have been made to indicate the UUID it's
regarding, though I've not verified this yet.

Currently, no UUIDs are treated as private yet, a way to configure that
is needed.

The implementation is careful to not add any additional IO work when
privateUUIDsKnown is False. It will skip looking at the private journal
at all. So this should be free, or nearly so, unless the feature is
used. When it is used, all branch reads will be about twice as expensive.

It is very lucky -- or very prudent design -- that Annex.Branch.change
and maybeChange are the only ways to change a file on the branch,
and Annex.Branch.set is only internal use. That let Annex.Branch.get
always yield any private information that has been recorded, without
the risk that Annex.Branch.set might be called, with a non-private UUID,
and end up leaking the private information into the git-annex branch.

And, this relies on the way git-annex union merges the git-annex branch.
When reading a file, there can be a public and a private version, and
they are just concacenated together. That will be handled the same as if
there were two diverged git-annex branches that got union merged.
This commit is contained in:
Joey Hess 2021-04-20 14:32:41 -04:00
parent b2222e4639
commit 05989556a2
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
26 changed files with 189 additions and 94 deletions

View file

@ -28,7 +28,7 @@ data Activity
recordActivity :: Activity -> UUID -> Annex ()
recordActivity act uuid = do
c <- currentVectorClock
Annex.Branch.change activityLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) activityLog $
buildLogOld buildActivity
. changeLog c uuid (Right act)
. parseLogOld parseActivity

View file

@ -37,8 +37,10 @@ chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex ()
chunksStored u k chunkmethod chunkcount = do
c <- currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (chunkLogFile config k) $
buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog
Annex.Branch.change
(Annex.Branch.RegardingUUID [u])
(chunkLogFile config k)
(buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog)
chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0

View file

@ -35,7 +35,7 @@ setGlobalConfig name new = do
setGlobalConfig' :: ConfigKey -> ConfigValue -> Annex ()
setGlobalConfig' name new = do
c <- currentVectorClock
Annex.Branch.change configLog $
Annex.Branch.change (Annex.Branch.RegardingUUID []) configLog $
buildGlobalConfig . changeMapLog c name new . parseGlobalConfig
unsetGlobalConfig :: ConfigKey -> Annex ()

View file

@ -32,8 +32,10 @@ recordContentIdentifier :: RemoteStateHandle -> ContentIdentifier -> Key -> Anne
recordContentIdentifier (RemoteStateHandle u) cid k = do
c <- currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.maybeChange (remoteContentIdentifierLogFile config k) $
addcid c . parseLog
Annex.Branch.maybeChange
(Annex.Branch.RegardingUUID [u])
(remoteContentIdentifierLogFile config k)
(addcid c . parseLog)
where
addcid c v
| cid `elem` l = Nothing -- no change needed

View file

@ -26,7 +26,7 @@ import Logs.Difference.Pure
recordDifferences :: Differences -> UUID -> Annex ()
recordDifferences ds@(Differences {}) uuid = do
c <- currentVectorClock
Annex.Branch.change differenceLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) differenceLog $
buildLogOld byteString
. changeLog c uuid (encodeBS $ showDifferences ds)
. parseLogOld A.takeByteString

View file

@ -65,10 +65,10 @@ recordExportBeginning remoteuuid newtree = do
. parseExportLogMap
<$> Annex.Branch.get exportLog
let new = updateIncompleteExportedTreeish old (nub (newtree:incompleteExportedTreeishes [old]))
Annex.Branch.change exportLog $
buildExportLog
. changeMapLog c ep new
. parseExportLog
Annex.Branch.change
(Annex.Branch.RegardingUUID [remoteuuid, u])
exportLog
(buildExportLog . changeMapLog c ep new . parseExportLog)
recordExportTreeish newtree
-- Graft a tree ref into the git-annex branch. This is done
@ -96,7 +96,9 @@ recordExportUnderway remoteuuid ec = do
hereuuid <- getUUID
let ep = ExportParticipants { exportFrom = hereuuid, exportTo = remoteuuid }
let exported = mkExported (newTreeish ec) []
Annex.Branch.change exportLog $
Annex.Branch.change
(Annex.Branch.RegardingUUID [remoteuuid, hereuuid])
exportLog $
buildExportLog
. changeMapLog c ep exported
. M.mapWithKey (updateForExportChange remoteuuid ec c hereuuid)

View file

@ -39,7 +39,7 @@ groupChange :: UUID -> (S.Set Group -> S.Set Group) -> Annex ()
groupChange uuid@(UUID _) modifier = do
curr <- lookupGroups uuid
c <- currentVectorClock
Annex.Branch.change groupLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) groupLog $
buildLogOld buildGroup . changeLog c uuid (modifier curr) . parseLogOld parseGroup
-- The changed group invalidates the preferred content cache.

View file

@ -66,7 +66,8 @@ logChange = logChange' logNow
logChange' :: (LogStatus -> LogInfo -> Annex LogLine) -> Key -> UUID -> LogStatus -> Annex ()
logChange' mklog key u@(UUID _) s = do
config <- Annex.getGitConfig
maybeAddLog (locationLogFile config key) =<< mklog s (LogInfo (fromUUID u))
maybeAddLog (Annex.Branch.RegardingUUID [u]) (locationLogFile config key)
=<< mklog s (LogInfo (fromUUID u))
logChange' _ _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have
@ -114,7 +115,9 @@ setDead key = do
ls <- compactLog <$> readLog logfile
mapM_ (go logfile) (filter (\l -> status l == InfoMissing) ls)
where
go logfile l = addLog logfile $ setDead' l
go logfile l =
let u = toUUID (fromLogInfo (info l))
in addLog (Annex.Branch.RegardingUUID [u]) logfile (setDead' l)
{- Note that the timestamp in the log is updated minimally, so that this
- can be overruled by other location log changes. -}

View file

@ -99,25 +99,25 @@ getCurrentRemoteMetaData (RemoteStateHandle u) k = extractRemoteMetaData u <$>
{- Adds in some metadata, which can override existing values, or unset
- them, but otherwise leaves any existing metadata as-is. -}
addMetaData :: Key -> MetaData -> Annex ()
addMetaData = addMetaData' metaDataLogFile
addMetaData = addMetaData' (Annex.Branch.RegardingUUID []) metaDataLogFile
addMetaData' :: (GitConfig -> Key -> RawFilePath) -> Key -> MetaData -> Annex ()
addMetaData' getlogfile k metadata =
addMetaDataClocked' getlogfile k metadata =<< currentVectorClock
addMetaData' :: Annex.Branch.RegardingUUID -> (GitConfig -> Key -> RawFilePath) -> Key -> MetaData -> Annex ()
addMetaData' ru getlogfile k metadata =
addMetaDataClocked' ru getlogfile k metadata =<< currentVectorClock
{- Reusing the same VectorClock when making changes to the metadata
- of multiple keys is a nice optimisation. The same metadata lines
- will tend to be generated across the different log files, and so
- git will be able to pack the data more efficiently. -}
addMetaDataClocked :: Key -> MetaData -> VectorClock -> Annex ()
addMetaDataClocked = addMetaDataClocked' metaDataLogFile
addMetaDataClocked = addMetaDataClocked' (Annex.Branch.RegardingUUID []) metaDataLogFile
addMetaDataClocked' :: (GitConfig -> Key -> RawFilePath) -> Key -> MetaData -> VectorClock -> Annex ()
addMetaDataClocked' getlogfile k d@(MetaData m) c
addMetaDataClocked' :: Annex.Branch.RegardingUUID -> (GitConfig -> Key -> RawFilePath) -> Key -> MetaData -> VectorClock -> Annex ()
addMetaDataClocked' ru getlogfile k d@(MetaData m) c
| d == emptyMetaData = noop
| otherwise = do
config <- Annex.getGitConfig
Annex.Branch.change (getlogfile config k) $
Annex.Branch.change ru (getlogfile config k) $
buildLog . simplifyLog
. S.insert (LogEntry c metadata)
. parseLog
@ -126,7 +126,7 @@ addMetaDataClocked' getlogfile k d@(MetaData m) c
addRemoteMetaData :: Key -> RemoteStateHandle -> MetaData -> Annex ()
addRemoteMetaData k (RemoteStateHandle u) m =
addMetaData' remoteMetaDataLogFile k $ fromRemoteMetaData $
addMetaData' (Annex.Branch.RegardingUUID [u]) remoteMetaDataLogFile k $ fromRemoteMetaData $
RemoteMetaData u m
getMetaDataLog :: Key -> Annex (Log MetaData)
@ -153,8 +153,10 @@ copyMetaData oldkey newkey
then return False
else do
config <- Annex.getGitConfig
Annex.Branch.change (metaDataLogFile config newkey) $
const $ buildLog l
Annex.Branch.change
(Annex.Branch.RegardingUUID [])
(metaDataLogFile config newkey)
(const $ buildLog l)
return True
readLog :: RawFilePath -> Annex (Log MetaData)

View file

@ -26,7 +26,7 @@ newtype Fingerprint = Fingerprint String
recordFingerprint :: Fingerprint -> UUID -> Annex ()
recordFingerprint fp uuid = do
c <- currentVectorClock
Annex.Branch.change multicastLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) multicastLog $
buildLogOld buildFindgerPrint
. changeLog c uuid fp
. parseLogOld fingerprintParser

View file

@ -18,6 +18,7 @@ module Logs.NumCopies (
import Annex.Common
import qualified Annex
import qualified Annex.Branch
import Types.NumCopies
import Logs
import Logs.SingleValue
@ -34,13 +35,13 @@ setGlobalNumCopies :: NumCopies -> Annex ()
setGlobalNumCopies new = do
curr <- getGlobalNumCopies
when (curr /= Just new) $
setLog numcopiesLog new
setLog (Annex.Branch.RegardingUUID []) numcopiesLog new
setGlobalMinCopies :: MinCopies -> Annex ()
setGlobalMinCopies new = do
curr <- getGlobalMinCopies
when (curr /= Just new) $
setLog mincopiesLog new
setLog (Annex.Branch.RegardingUUID []) mincopiesLog new
{- Value configured in the numcopies log. Cached for speed. -}
getGlobalNumCopies :: Annex (Maybe NumCopies)

View file

@ -31,7 +31,7 @@ requiredContentSet = setLog requiredContentLog
setLog :: RawFilePath -> UUID -> PreferredContentExpression -> Annex ()
setLog logfile uuid@(UUID _) val = do
c <- currentVectorClock
Annex.Branch.change logfile $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) logfile $
buildLogOld buildPreferredContentExpression
. changeLog c uuid val
. parseLogOld parsePreferredContentExpression
@ -45,7 +45,9 @@ setLog _ NoUUID _ = error "unknown UUID; cannot modify"
groupPreferredContentSet :: Group -> PreferredContentExpression -> Annex ()
groupPreferredContentSet g val = do
c <- currentVectorClock
Annex.Branch.change groupPreferredContentLog $
Annex.Branch.change
(Annex.Branch.RegardingUUID [])
groupPreferredContentLog $
buildGroupPreferredContent
. changeMapLog c g val
. parseGroupPreferredContent

View file

@ -30,16 +30,16 @@ import Git.Types (RefDate)
{- Adds a LogLine to the log, removing any LogLines that are obsoleted by
- adding it. -}
addLog :: RawFilePath -> LogLine -> Annex ()
addLog file line = Annex.Branch.change file $ \b ->
addLog :: Annex.Branch.RegardingUUID -> RawFilePath -> LogLine -> Annex ()
addLog ru file line = Annex.Branch.change ru file $ \b ->
buildLog $ compactLog (line : parseLog b)
{- When a LogLine already exists with the same status and info, but an
- older timestamp, that LogLine is preserved, rather than updating the log
- with a newer timestamp.
-}
maybeAddLog :: RawFilePath -> LogLine -> Annex ()
maybeAddLog file line = Annex.Branch.maybeChange file $ \s -> do
maybeAddLog :: Annex.Branch.RegardingUUID -> RawFilePath -> LogLine -> Annex ()
maybeAddLog ru file line = Annex.Branch.maybeChange ru file $ \s -> do
m <- insertNewStatus line $ logMap $ parseLog s
return $ buildLog $ mapLog m

View file

@ -33,7 +33,7 @@ import qualified Data.Map as M
configSet :: UUID -> RemoteConfig -> Annex ()
configSet u cfg = do
c <- currentVectorClock
Annex.Branch.change remoteLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [u]) remoteLog $
buildRemoteConfigLog
. changeLog c u (removeSameasInherited cfg)
. parseRemoteConfigLog

View file

@ -28,8 +28,10 @@ setRemoteState :: RemoteStateHandle -> Key -> RemoteState -> Annex ()
setRemoteState (RemoteStateHandle u) k s = do
c <- currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (remoteStateLogFile config k) $
buildRemoteState . changeLog c u s . parseRemoteState
Annex.Branch.change
(Annex.Branch.RegardingUUID [u])
(remoteStateLogFile config k)
(buildRemoteState . changeLog c u s . parseRemoteState)
buildRemoteState :: Log RemoteState -> Builder
buildRemoteState = buildLogNew (byteString . encodeBS)

View file

@ -33,7 +33,7 @@ import Logs.File
scheduleSet :: UUID -> [ScheduledActivity] -> Annex ()
scheduleSet uuid@(UUID _) activities = do
c <- currentVectorClock
Annex.Branch.change scheduleLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) scheduleLog $
buildLogOld byteString
. changeLog c uuid (encodeBS val)
. parseLogOld A.takeByteString

View file

@ -31,8 +31,8 @@ readLog = parseLog <$$> Annex.Branch.get
getLog :: (Ord v, SingleValueSerializable v) => RawFilePath -> Annex (Maybe v)
getLog = newestValue <$$> readLog
setLog :: (SingleValueSerializable v) => RawFilePath -> v -> Annex ()
setLog f v = do
setLog :: (SingleValueSerializable v) => Annex.Branch.RegardingUUID -> RawFilePath -> v -> Annex ()
setLog ru f v = do
c <- currentVectorClock
let ent = LogEntry c v
Annex.Branch.change f $ \_old -> buildLog (S.singleton ent)
Annex.Branch.change ru f $ \_old -> buildLog (S.singleton ent)

View file

@ -23,7 +23,7 @@ import Logs.Trust.Pure as X
trustSet :: UUID -> TrustLevel -> Annex ()
trustSet uuid@(UUID _) level = do
c <- currentVectorClock
Annex.Branch.change trustLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) trustLog $
buildLogOld buildTrustLevel .
changeLog c uuid level .
parseLogOld trustLevelParser

View file

@ -32,7 +32,7 @@ import qualified Data.Attoparsec.ByteString.Lazy as A
describeUUID :: UUID -> UUIDDesc -> Annex ()
describeUUID uuid desc = do
c <- currentVectorClock
Annex.Branch.change uuidLog $
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) uuidLog $
buildLogOld buildUUIDDesc . changeLog c uuid desc . parseUUIDLog
{- The map is cached for speed. -}

View file

@ -62,7 +62,7 @@ setUrlPresent key url = do
us <- getUrls key
unless (url `elem` us) $ do
config <- Annex.getGitConfig
addLog (urlLogFile config key)
addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
=<< logNow InfoPresent (LogInfo (encodeBS url))
-- If the url does not have an OtherDownloader, it must be present
-- in the web.
@ -76,7 +76,7 @@ setUrlMissing key url = do
us <- getUrls key
when (url `elem` us) $ do
config <- Annex.getGitConfig
addLog (urlLogFile config key)
addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
=<< logNow InfoMissing (LogInfo (encodeBS url))
-- If the url was a web url and none of the remaining urls
-- for the key are web urls, the key must not be present