implement annex.privateremote and remote.name.private configs

The slightly unusual parsing in Types.GitConfig avoids the need to look
at the remote list to get configs of remotes. annexPrivateRepos combines
all the configs, and will only be calculated once, so it's nice and
fast.

privateUUIDsKnown and regardingPrivateUUID now need to read from the
annex mvar, so are not entirely free. But that overhead can be optimised
away, as seen in getJournalFileStale. The other call sites didn't seem
worth optimising to save a single MVar access. The feature should have
impreceptable speed overhead when not being used.
This commit is contained in:
Joey Hess 2021-04-23 14:21:57 -04:00
parent 0e830b6bb5
commit 32138b8cd8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 78 additions and 52 deletions

View file

@ -185,7 +185,7 @@ updateTo' pairs = do
else do else do
mergedrefs <- getMergedRefs mergedrefs <- getMergedRefs
filterM isnewer (excludeset mergedrefs unignoredrefs) filterM isnewer (excludeset mergedrefs unignoredrefs)
journalclean <- if null tomerge journalcleaned <- if null tomerge
{- Even when no refs need to be merged, the index {- Even when no refs need to be merged, the index
- may still be updated if the branch has gotten ahead - may still be updated if the branch has gotten ahead
- of the index, or just if the journal is dirty. -} - of the index, or just if the journal is dirty. -}
@ -210,12 +210,12 @@ updateTo' pairs = do
else do else do
lockJournal $ go branchref dirty tomerge lockJournal $ go branchref dirty tomerge
return True return True
journalclean <- if journalcleaned
then not <$> privateUUIDsKnown
else pure False
return $ UpdateMade return $ UpdateMade
{ refsWereMerged = not (null tomerge) { refsWereMerged = not (null tomerge)
, journalClean = journalclean , journalClean = journalclean
-- TODO need private index, then this can be
-- removed
&& not privateUUIDsKnown
} }
where where
excludeset s = filter (\(r, _) -> S.notMember r s) excludeset s = filter (\(r, _) -> S.notMember r s)
@ -336,7 +336,7 @@ maybeChange ru file f = lockJournal $ \jl -> do
{- Only get private information when the RegardingUUID is itself private. -} {- Only get private information when the RegardingUUID is itself private. -}
getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString
getToChange = getLocal' . GetPrivate . regardingPrivateUUID getToChange ru f = flip getLocal' f . GetPrivate =<< regardingPrivateUUID ru
{- Records new content of a file into the journal. {- Records new content of a file into the journal.
- -
@ -452,11 +452,12 @@ files = do
{- Lists all files currently in the journal. There may be duplicates in {- Lists all files currently in the journal. There may be duplicates in
- the list when using a private journal. -} - the list when using a private journal. -}
journalledFiles :: Annex [RawFilePath] journalledFiles :: Annex [RawFilePath]
journalledFiles journalledFiles = ifM privateUUIDsKnown
| privateUUIDsKnown = (++) ( (++)
<$> getJournalledFilesStale gitAnnexPrivateJournalDir <$> getJournalledFilesStale gitAnnexPrivateJournalDir
<*> getJournalledFilesStale gitAnnexJournalDir <*> getJournalledFilesStale gitAnnexJournalDir
| otherwise = getJournalledFilesStale gitAnnexJournalDir , getJournalledFilesStale gitAnnexJournalDir
)
{- Files in the branch, not including any from journalled changes, {- Files in the branch, not including any from journalled changes,
- and without updating the branch. -} - and without updating the branch. -}

View file

@ -14,14 +14,16 @@
module Annex.Journal where module Annex.Journal where
import Annex.Common import Annex.Common
import qualified Annex
import qualified Git import qualified Git
import Annex.Perms import Annex.Perms
import Annex.Tmp import Annex.Tmp
import Annex.LockFile import Annex.LockFile
import Utility.Directory.Stream import Utility.Directory.Stream
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S import qualified Data.ByteString as B
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import Data.ByteString.Builder import Data.ByteString.Builder
import Data.Char import Data.Char
@ -42,21 +44,27 @@ instance Journalable Builder where
{- When a file in the git-annex branch is changed, this indicates what {- When a file in the git-annex branch is changed, this indicates what
- repository UUID (or in some cases, UUIDs) a change is regarding. - repository UUID (or in some cases, UUIDs) a change is regarding.
- -
- Using this lets changes regarding private UUIDs be written to the - Using this lets changes regarding private UUIDs be stored separately
- private index, rather than to the main branch index, so it does - from the git-annex branch, so its information does not get exposed
- not get exposed to other remotes. - outside the repo.
-} -}
data RegardingUUID = RegardingUUID [UUID] data RegardingUUID = RegardingUUID [UUID]
regardingPrivateUUID :: RegardingUUID -> Bool regardingPrivateUUID :: RegardingUUID -> Annex Bool
regardingPrivateUUID (RegardingUUID []) = False regardingPrivateUUID (RegardingUUID []) = pure False
regardingPrivateUUID (RegardingUUID _) = True -- TODO regardingPrivateUUID (RegardingUUID us) = do
s <- annexPrivateRepos <$> Annex.getGitConfig
return (any (flip S.member s) us)
-- Are any private UUIDs known to exist? If so, extra work has to be done, {- Are any private UUIDs known to exist? If so, extra work has to be done,
-- to check for information separately recorded for them, outside the usual - to check for information separately recorded for them, outside the usual
-- locations. - locations.
privateUUIDsKnown :: Bool -}
privateUUIDsKnown = True -- TODO privateUUIDsKnown :: Annex Bool
privateUUIDsKnown = privateUUIDsKnown' <$> Annex.getState id
privateUUIDsKnown' :: Annex.AnnexState -> Bool
privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig
{- Records content for a file in the branch to the journal. {- Records content for a file in the branch to the journal.
- -
@ -69,9 +77,10 @@ privateUUIDsKnown = True -- TODO
-} -}
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
jd <- fromRepo $ if regardingPrivateUUID ru jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
then gitAnnexPrivateJournalDir ( return gitAnnexPrivateJournalDir
else gitAnnexJournalDir , return gitAnnexJournalDir
)
createAnnexDirectory jd createAnnexDirectory jd
-- journal file is written atomically -- journal file is written atomically
let jfile = journalFile file let jfile = journalFile file
@ -98,8 +107,12 @@ data GetPrivate = GetPrivate Bool
- laziness doesn't matter much, as the files are not very large. - laziness doesn't matter much, as the files are not very large.
-} -}
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString) getJournalFileStale :: GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
getJournalFileStale (GetPrivate getprivate) file = inRepo $ \g -> getJournalFileStale (GetPrivate getprivate) file = do
if getprivate && privateUUIDsKnown -- Optimisation to avoid a second MVar access.
st <- Annex.getState id
let g = Annex.repo st
liftIO $
if getprivate && privateUUIDsKnown' st
then do then do
x <- getfrom (gitAnnexJournalDir g) x <- getfrom (gitAnnexJournalDir g)
y <- getfrom (gitAnnexPrivateJournalDir g) y <- getfrom (gitAnnexPrivateJournalDir g)
@ -110,7 +123,7 @@ getJournalFileStale (GetPrivate getprivate) file = inRepo $ \g ->
where where
jfile = journalFile file jfile = journalFile file
getfrom d = catchMaybeIO $ getfrom d = catchMaybeIO $
L.fromStrict <$> S.readFile (fromRawFilePath (d P.</> jfile)) L.fromStrict <$> B.readFile (fromRawFilePath (d P.</> jfile))
{- List of existing journal files in a journal directory, but without locking, {- List of existing journal files in a journal directory, but without locking,
- may miss new ones just being added, or may have false positives if the - may miss new ones just being added, or may have false positives if the
@ -145,12 +158,12 @@ journalDirty getjournaldir = do
- in the journal directory. - in the journal directory.
-} -}
journalFile :: RawFilePath -> RawFilePath journalFile :: RawFilePath -> RawFilePath
journalFile file = S.concatMap mangle file journalFile file = B.concatMap mangle file
where where
mangle c mangle c
| P.isPathSeparator c = S.singleton underscore | P.isPathSeparator c = B.singleton underscore
| c == underscore = S.pack [underscore, underscore] | c == underscore = B.pack [underscore, underscore]
| otherwise = S.singleton c | otherwise = B.singleton c
underscore = fromIntegral (ord '_') underscore = fromIntegral (ord '_')
{- Converts a journal file (relative to the journal dir) back to the {- Converts a journal file (relative to the journal dir) back to the
@ -159,16 +172,16 @@ fileJournal :: RawFilePath -> RawFilePath
fileJournal = go fileJournal = go
where where
go b = go b =
let (h, t) = S.break (== underscore) b let (h, t) = B.break (== underscore) b
in h <> case S.uncons t of in h <> case B.uncons t of
Nothing -> t Nothing -> t
Just (_u, t') -> case S.uncons t' of Just (_u, t') -> case B.uncons t' of
Nothing -> t' Nothing -> t'
Just (w, t'') Just (w, t'')
| w == underscore -> | w == underscore ->
S.cons underscore (go t'') B.cons underscore (go t'')
| otherwise -> | otherwise ->
S.cons P.pathSeparator (go t') B.cons P.pathSeparator (go t')
underscore = fromIntegral (ord '_') underscore = fromIntegral (ord '_')

View file

@ -1,6 +1,6 @@
{- git remote stuff {- git remote stuff
- -
- Copyright 2012 Joey Hess <id@joeyh.name> - Copyright 2012-2021 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -25,7 +25,11 @@ import Git.FilePath
{- Is a git config key one that specifies the url of a remote? -} {- Is a git config key one that specifies the url of a remote? -}
isRemoteUrlKey :: ConfigKey -> Bool isRemoteUrlKey :: ConfigKey -> Bool
isRemoteUrlKey (ConfigKey k) = "remote." `S.isPrefixOf` k && ".url" `S.isSuffixOf` k isRemoteUrlKey = isRemoteKey "url"
isRemoteKey :: S.ByteString -> ConfigKey -> Bool
isRemoteKey want (ConfigKey k) =
"remote." `S.isPrefixOf` k && ("." <> want) `S.isSuffixOf` k
{- Get a remote's name from the a config key such as remote.name.url {- Get a remote's name from the a config key such as remote.name.url
- or any other per-remote config key. -} - or any other per-remote config key. -}

View file

@ -30,6 +30,7 @@ import qualified Git.Config
import qualified Git.Construct import qualified Git.Construct
import Git.Types import Git.Types
import Git.ConfigTypes import Git.ConfigTypes
import Git.Remote (isRemoteKey, remoteKeyToRemoteName)
import Git.Branch (CommitMode(..)) import Git.Branch (CommitMode(..))
import Utility.DataUnits import Utility.DataUnits
import Config.Cost import Config.Cost
@ -50,6 +51,7 @@ import Utility.Url (Scheme, mkScheme)
import Control.Concurrent.STM import Control.Concurrent.STM
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.ByteString as B import qualified Data.ByteString as B
-- | A configurable value, that may not be fully determined yet because -- | A configurable value, that may not be fully determined yet because
@ -136,12 +138,13 @@ data GitConfig = GitConfig
, gcryptId :: Maybe String , gcryptId :: Maybe String
, gpgCmd :: GpgCmd , gpgCmd :: GpgCmd
, mergeDirectoryRenames :: Maybe String , mergeDirectoryRenames :: Maybe String
, annexPrivateRepos :: S.Set UUID
} }
extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig
extractGitConfig configsource r = GitConfig extractGitConfig configsource r = GitConfig
{ annexVersion = RepoVersion <$> getmayberead (annexConfig "version") { annexVersion = RepoVersion <$> getmayberead (annexConfig "version")
, annexUUID = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid") , annexUUID = hereuuid
, annexNumCopies = NumCopies <$> getmayberead (annexConfig "numcopies") , annexNumCopies = NumCopies <$> getmayberead (annexConfig "numcopies")
, annexDiskReserve = fromMaybe onemegabyte $ , annexDiskReserve = fromMaybe onemegabyte $
readSize dataUnits =<< getmaybe (annexConfig "diskreserve") readSize dataUnits =<< getmaybe (annexConfig "diskreserve")
@ -239,6 +242,19 @@ extractGitConfig configsource r = GitConfig
, gcryptId = getmaybe "core.gcrypt-id" , gcryptId = getmaybe "core.gcrypt-id"
, gpgCmd = mkGpgCmd (getmaybe "gpg.program") , gpgCmd = mkGpgCmd (getmaybe "gpg.program")
, mergeDirectoryRenames = getmaybe "directoryrenames" , mergeDirectoryRenames = getmaybe "directoryrenames"
, annexPrivateRepos = S.fromList $ concat
[ if getbool (annexConfig "private") False
then [hereuuid]
else []
, let get (k, v)
| Git.Config.isTrueFalse' v /= Just True = Nothing
| isRemoteKey (remoteAnnexConfigEnd "private") k = do
remotename <- remoteKeyToRemoteName k
toUUID <$> Git.Config.getMaybe
(remoteAnnexConfig remotename "uuid") r
| otherwise = Nothing
in mapMaybe get (M.toList (Git.config r))
]
} }
where where
getbool k d = fromMaybe d $ getmaybebool k getbool k d = fromMaybe d $ getmaybebool k
@ -255,6 +271,8 @@ extractGitConfig configsource r = GitConfig
FromGlobalConfig -> HasGlobalConfig v FromGlobalConfig -> HasGlobalConfig v
onemegabyte = 1000000 onemegabyte = 1000000
hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid")
{- Merge a GitConfig that comes from git-config with one containing {- Merge a GitConfig that comes from git-config with one containing
- repository-global defaults. -} - repository-global defaults. -}
@ -443,7 +461,10 @@ instance RemoteNameable RemoteName where
{- A per-remote annex setting in git config. -} {- A per-remote annex setting in git config. -}
remoteAnnexConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey remoteAnnexConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey
remoteAnnexConfig r key = remoteConfig r ("annex-" <> key) remoteAnnexConfig r = remoteConfig r . remoteAnnexConfigEnd
remoteAnnexConfigEnd :: UnqualifiedConfigKey -> UnqualifiedConfigKey
remoteAnnexConfigEnd key = "annex-" <> key
{- A per-remote setting in git config. -} {- A per-remote setting in git config. -}
remoteConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey remoteConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey

View file

@ -157,19 +157,6 @@ later write.
> Annex.Branch, also need to be fixed (and may be missing journal files > Annex.Branch, also need to be fixed (and may be missing journal files
> already?) Most fixed now. Command.Log behavior needs to be > already?) Most fixed now. Command.Log behavior needs to be
> investigated still. > investigated still.
>
> * Need to implement regardingPrivateUUID and privateUUIDsKnown,
> which need to look at the git config to find the private uuids.
>
> But that involves a mvar access, so there will be some slow down,
> although often it will be swamped by the actual branch querying.
> So far it's been possible to avoid any slow down from this feature
> when it's not in use.
>
> Encoding inside the uuid if a repo is private avoids slowdown of
> regardingPrivateUUID, but not privateUUIDsKnown. (So branch queries
> still slow down). It also avoids needing to set the config before
> writing to the branch when setting up a private repo or special remote.
## networks of hidden repos ## networks of hidden repos