Merge branch 'trackassociated'

This commit is contained in:
Joey Hess 2021-05-25 11:43:52 -04:00
commit 483fc4dc6b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
22 changed files with 302 additions and 168 deletions

View file

@ -27,8 +27,8 @@ import qualified Data.Set as S
type Reason = String
{- Drop a key from local and/or remote when allowed by the preferred content
- and numcopies settings.
{- Drop a key from local and/or remote when allowed by the preferred content,
- required content, and numcopies settings.
-
- Skips trying to drop from remotes that are appendonly, since those drops
- would presumably fail. Also skips dropping from exporttree/importtree remotes,
@ -103,16 +103,13 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
dropr fs r n >>= go fs rest
| otherwise = pure n
checkdrop fs n u a
| null fs = check $ -- no associated files; unused content
wantDrop True u (Just key) (AssociatedFile Nothing)
| otherwise = check $
allM (wantDrop True u (Just key) . AssociatedFile . Just) fs
where
check c = ifM c
( dodrop n u a
, return n
)
checkdrop fs n u a =
let afs = map (AssociatedFile . Just) fs
pcc = Command.Drop.PreferredContentChecked True
in ifM (wantDrop True u (Just key) afile (Just afs))
( dodrop n u (a pcc)
, return n
)
dodrop n@(have, numcopies, mincopies, _untrusted) u a =
ifM (safely $ runner $ a numcopies mincopies)
@ -130,12 +127,12 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
, return n
)
dropl fs n = checkdrop fs n Nothing $ \numcopies mincopies ->
dropl fs n = checkdrop fs n Nothing $ \pcc numcopies mincopies ->
stopUnless (inAnnex key) $
Command.Drop.startLocal afile ai si numcopies mincopies key preverified
Command.Drop.startLocal pcc afile ai si numcopies mincopies key preverified
dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies mincopies ->
Command.Drop.startRemote afile ai si numcopies mincopies key r
dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \pcc numcopies mincopies ->
Command.Drop.startRemote pcc afile ai si numcopies mincopies key r
ai = mkActionItem (key, afile)

View file

@ -134,8 +134,8 @@ initialize' mversion = checkInitializeAllowed $ do
else deconfigureSmudgeFilter
unlessM isBareRepo $ do
when supportunlocked $ do
showSideAction "scanning for unlocked files"
scanUnlockedFiles
showSideAction "scanning for annexed files"
scanAnnexedFiles
hookWrite postCheckoutHook
hookWrite postMergeHook
AdjustedBranch.checkAdjustedClone >>= \case

View file

@ -1,6 +1,6 @@
{- git-annex checking whether content is wanted
-
- Copyright 2012 Joey Hess <id@joeyh.name>
- Copyright 2012-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -10,6 +10,10 @@ module Annex.Wanted where
import Annex.Common
import Logs.PreferredContent
import Annex.UUID
import Annex.CatFile
import Git.FilePath
import qualified Database.Keys
import Types.FileMatcher
import qualified Data.Set as S
@ -17,13 +21,55 @@ import qualified Data.Set as S
wantGet :: Bool -> Maybe Key -> AssociatedFile -> Annex Bool
wantGet d key file = isPreferredContent Nothing S.empty key file d
{- Check if a file is preferred content for a remote. -}
{- Check if a file is preferred content for a repository. -}
wantSend :: Bool -> Maybe Key -> AssociatedFile -> UUID -> Annex Bool
wantSend d key file to = isPreferredContent (Just to) S.empty key file d
{- Check if a file can be dropped, maybe from a remote.
- Don't drop files that are preferred content. -}
wantDrop :: Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> Annex Bool
wantDrop d from key file = do
u <- maybe getUUID (return . id) from
not <$> isPreferredContent (Just u) (S.singleton u) key file d
{- Check if a file is not preferred or required content, and can be
- dropped. When a UUID is provided, checks for that repository.
-
- The AssociatedFile is the one that the user requested to drop.
- There may be other files that use the same key, and preferred content
- may match some of those and not others. If any are preferred content,
- that will prevent dropping. When the other associated files are known,
- they can be provided, otherwise this looks them up.
-}
wantDrop :: Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex Bool
wantDrop d from key file others =
isNothing <$> checkDrop isPreferredContent d from key file others
{- Generalization of wantDrop that can also be used with isRequiredContent.
-
- When the content should not be dropped, returns Just the file that
- the checker matches.
-}
checkDrop :: (Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool) -> Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex (Maybe AssociatedFile)
checkDrop checker d from key file others = do
u <- maybe getUUID (pure . id) from
let s = S.singleton u
let checker' f = checker (Just u) s key f d
ifM (checker' file)
( return (Just file)
, do
others' <- case others of
Just afs -> pure (filter (/= file) afs)
Nothing -> case key of
Just k ->
mapM (\f -> AssociatedFile . Just <$> fromRepo (fromTopFilePath f))
=<< Database.Keys.getAssociatedFiles k
Nothing -> pure []
l <- filterM checker' others'
if null l
then return Nothing
else checkassociated l
)
where
-- Some associated files that are in the keys database may no
-- longer correspond to files in the repository, and should
-- not prevent dropping.
checkassociated [] = return Nothing
checkassociated (af@(AssociatedFile (Just f)):fs) =
catKeyFile f >>= \case
Just k | Just k == key -> return (Just af)
_ -> checkassociated fs
checkassociated (AssociatedFile Nothing:fs) = checkassociated fs

View file

@ -66,19 +66,19 @@ whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a
ifAnnexed file yes no = maybe no yes =<< lookupKey file
{- Find all unlocked files and update the keys database for them.
{- Find all annexed files and update the keys database for them.
-
- This is expensive, and so normally the associated files are updated
- incrementally when changes are noticed. So, this only needs to be done
- when initializing/upgrading repository.
- when initializing/upgrading a repository.
-
- Also, the content for the unlocked file may already be present as
- Also, the content for an unlocked file may already be present as
- an annex object. If so, populate the pointer file with it.
- But if worktree file does not have a pointer file's content, it is left
- as-is.
-}
scanUnlockedFiles :: Annex ()
scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ do
scanAnnexedFiles :: Annex ()
scanAnnexedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ do
dropold <- liftIO $ newMVar $
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles
@ -87,9 +87,10 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
(Git.LsTree.LsTreeLong False)
Git.Ref.headRef
forM_ l $ \i ->
when (isregfile i) $
maybe noop (add dropold i)
=<< catKey (Git.LsTree.sha i)
maybe noop (add dropold i)
=<< catKey'
(Git.LsTree.sha i)
(fromMaybe 0 (Git.LsTree.size i))
liftIO $ void cleanup
where
isregfile i = case Git.Types.toTreeItemType (Git.LsTree.mode i) of
@ -101,7 +102,7 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
let tf = Git.LsTree.file i
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.addAssociatedFileFast k tf
whenM (inAnnex k) $ do
whenM (pure (isregfile i) <&&> inAnnex k) $ do
f <- fromRepo $ fromTopFilePath tf
liftIO (isPointerFile f) >>= \case
Just k' | k' == k -> do

View file

@ -1,5 +1,16 @@
git-annex (8.20210429) UNRELEASED; urgency=medium
* When two files have the same content, and a required content expression
matches one but not the other, dropping the latter file will fail as it
would also remove the content of the required file.
* drop --auto: When two files have the same content, and a preferred content
expression matches one but not the other, do not drop the content.
* sync --content, assistant: When two unlocked files have the same
content, and a preferred content expression matches one but not the
other, do not drop the content. (This was already the case for locked
files.)
* sync --content, assistant: Fix an edge case where a file that is not
preferred content did not get dropped.
* filter-branch: New command, useful to produce a filtered version of the
git-annex branch, eg when splitting a repository.
* fromkey: Create an unlocked file when used in an adjusted branch

View file

@ -21,8 +21,6 @@ import Annex.Content
import Annex.Wanted
import Annex.Notification
import qualified Data.Set as S
cmd :: Command
cmd = withGlobalOptions [jobsOption, jsonOptions, annexedMatchingOptions] $
command "drop" SectionCommon
@ -86,33 +84,34 @@ start o from si file key = start' o from key afile ai si
start' :: DropOptions -> Maybe Remote -> Key -> AssociatedFile -> ActionItem -> SeekInput -> CommandStart
start' o from key afile ai si =
checkDropAuto (autoMode o) from afile key $ \numcopies mincopies ->
stopUnless want $
stopUnless wantdrop $
case from of
Nothing -> startLocal afile ai si numcopies mincopies key []
Just remote -> startRemote afile ai si numcopies mincopies key remote
Nothing -> startLocal pcc afile ai si numcopies mincopies key []
Just remote -> startRemote pcc afile ai si numcopies mincopies key remote
where
want
| autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile
wantdrop
| autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile Nothing
| otherwise = return True
pcc = PreferredContentChecked (autoMode o)
startKeys :: DropOptions -> Maybe Remote -> (SeekInput, Key, ActionItem) -> CommandStart
startKeys o from (si, key, ai) = start' o from key (AssociatedFile Nothing) ai si
startLocal :: AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> [VerifiedCopy] -> CommandStart
startLocal afile ai si numcopies mincopies key preverified =
startLocal :: PreferredContentChecked -> AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> [VerifiedCopy] -> CommandStart
startLocal pcc afile ai si numcopies mincopies key preverified =
starting "drop" (OnlyActionOn key ai) si $
performLocal key afile numcopies mincopies preverified
performLocal pcc key afile numcopies mincopies preverified
startRemote :: AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> Remote -> CommandStart
startRemote afile ai si numcopies mincopies key remote =
startRemote :: PreferredContentChecked -> AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> Remote -> CommandStart
startRemote pcc afile ai si numcopies mincopies key remote =
starting ("drop " ++ Remote.name remote) (OnlyActionOn key ai) si $
performRemote key afile numcopies mincopies remote
performRemote pcc key afile numcopies mincopies remote
performLocal :: Key -> AssociatedFile -> NumCopies -> MinCopies -> [VerifiedCopy] -> CommandPerform
performLocal key afile numcopies mincopies preverified = lockContentForRemoval key fallback $ \contentlock -> do
performLocal :: PreferredContentChecked -> Key -> AssociatedFile -> NumCopies -> MinCopies -> [VerifiedCopy] -> CommandPerform
performLocal pcc key afile numcopies mincopies preverified = lockContentForRemoval key fallback $ \contentlock -> do
u <- getUUID
(tocheck, verified) <- verifiableCopies key [u]
doDrop u (Just contentlock) key afile numcopies mincopies [] (preverified ++ verified) tocheck
doDrop pcc u (Just contentlock) key afile numcopies mincopies [] (preverified ++ verified) tocheck
( \proof -> do
fastDebug "Command.Drop" $ unwords
[ "Dropping from here"
@ -134,12 +133,12 @@ performLocal key afile numcopies mincopies preverified = lockContentForRemoval k
-- to be done except for cleaning up.
fallback = next $ cleanupLocal key
performRemote :: Key -> AssociatedFile -> NumCopies -> MinCopies -> Remote -> CommandPerform
performRemote key afile numcopies mincopies remote = do
performRemote :: PreferredContentChecked -> Key -> AssociatedFile -> NumCopies -> MinCopies -> Remote -> CommandPerform
performRemote pcc key afile numcopies mincopies remote = do
-- Filter the uuid it's being dropped from out of the lists of
-- places assumed to have the key, and places to check.
(tocheck, verified) <- verifiableCopies key [uuid]
doDrop uuid Nothing key afile numcopies mincopies [uuid] verified tocheck
doDrop pcc uuid Nothing key afile numcopies mincopies [uuid] verified tocheck
( \proof -> do
fastDebug "Command.Drop" $ unwords
[ "Dropping from remote"
@ -169,12 +168,11 @@ cleanupRemote key remote ok = do
- verify that enough copies of a key exist to allow it to be
- safely removed (with no data loss).
-
- Also checks if it's required content, and refuses to drop if so.
-
- --force overrides and always allows dropping.
-}
doDrop
:: UUID
:: PreferredContentChecked
-> UUID
-> Maybe ContentRemovalLock
-> Key
-> AssociatedFile
@ -185,10 +183,10 @@ doDrop
-> [UnVerifiedCopy]
-> (Maybe SafeDropProof -> CommandPerform, CommandPerform)
-> CommandPerform
doDrop dropfrom contentlock key afile numcopies mincopies skip preverified check (dropaction, nodropaction) =
doDrop pcc dropfrom contentlock key afile numcopies mincopies skip preverified check (dropaction, nodropaction) =
ifM (Annex.getState Annex.force)
( dropaction Nothing
, ifM (checkRequiredContent dropfrom key afile)
, ifM (checkRequiredContent pcc dropfrom key afile)
( verifyEnoughCopiesToDrop nolocmsg key
contentlock numcopies mincopies
skip preverified check
@ -203,18 +201,27 @@ doDrop dropfrom contentlock key afile numcopies mincopies skip preverified check
showLongNote "(Use --force to override this check, or adjust numcopies.)"
a
checkRequiredContent :: UUID -> Key -> AssociatedFile -> Annex Bool
checkRequiredContent u k afile =
ifM (isRequiredContent (Just u) S.empty (Just k) afile False)
( requiredContent
, return True
)
{- Checking preferred content also checks required content, so when
- auto mode causes preferred content to be checked, it's redundant
- for checkRequiredContent to separately check required content, and
- providing this avoids that extra work. -}
newtype PreferredContentChecked = PreferredContentChecked Bool
requiredContent :: Annex Bool
requiredContent = do
showLongNote "That file is required content, it cannot be dropped!"
showLongNote "(Use --force to override this check, or adjust required content configuration.)"
return False
checkRequiredContent :: PreferredContentChecked -> UUID -> Key -> AssociatedFile -> Annex Bool
checkRequiredContent (PreferredContentChecked True) _ _ _ = return True
checkRequiredContent (PreferredContentChecked False) u k afile =
checkDrop isRequiredContent False (Just u) (Just k) afile Nothing >>= \case
Nothing -> return True
Just afile' -> do
if afile == afile'
then showLongNote "That file is required content. It cannot be dropped!"
else showLongNote $ "That file has the same content as another file"
++ case afile' of
AssociatedFile (Just f) -> " (" ++ fromRawFilePath f ++ "),"
AssociatedFile Nothing -> ""
++ " which is required content. It cannot be dropped!"
showLongNote "(Use --force to override this check, or adjust required content configuration.)"
return False
{- In auto mode, only runs the action if there are enough
- copies on other semitrusted repositories. -}

View file

@ -49,7 +49,7 @@ perform :: Maybe Remote -> NumCopies -> MinCopies -> Key -> CommandPerform
perform from numcopies mincopies key = case from of
Just r -> do
showAction $ "from " ++ Remote.name r
Command.Drop.performRemote key (AssociatedFile Nothing) numcopies mincopies r
Command.Drop.performRemote pcc key (AssociatedFile Nothing) numcopies mincopies r
Nothing -> ifM (inAnnex key)
( droplocal
, ifM (objectFileExists key)
@ -63,7 +63,8 @@ perform from numcopies mincopies key = case from of
)
)
where
droplocal = Command.Drop.performLocal key (AssociatedFile Nothing) numcopies mincopies []
droplocal = Command.Drop.performLocal pcc key (AssociatedFile Nothing) numcopies mincopies []
pcc = Command.Drop.PreferredContentChecked False
performOther :: (Key -> Git.Repo -> RawFilePath) -> Key -> CommandPerform
performOther filespec key = do

View file

@ -62,7 +62,7 @@ perform file key = do
lockdown =<< calcRepo (gitAnnexLocation key)
addLink (CheckGitIgnore False) file key
=<< withTSDelta (liftIO . genInodeCache file)
next $ cleanup file key
next $ return True
where
lockdown obj = do
ifM (isUnmodified key obj)
@ -97,10 +97,5 @@ perform file key = do
lostcontent = logStatus key InfoMissing
cleanup :: RawFilePath -> Key -> CommandCleanup
cleanup file key = do
Database.Keys.removeAssociatedFile key =<< inRepo (toTopFilePath file)
return True
errorModified :: a
errorModified = giveup "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)"

View file

@ -86,7 +86,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken
urls <- getUrls oldkey
forM_ urls $ \url ->
setUrlPresent newkey url
next $ Command.ReKey.cleanup file oldkey newkey
next $ Command.ReKey.cleanup file newkey
, giveup "failed creating link from old to new key"
)
genkey Nothing = do

View file

@ -69,7 +69,7 @@ startKey o afile (si, key, ai) = case fromToOptions o of
( Command.Move.toStart Command.Move.RemoveNever afile key ai si =<< getParsed r
, do
(numcopies, mincopies) <- getnummincopies
Command.Drop.startRemote afile ai si numcopies mincopies key =<< getParsed r
Command.Drop.startRemote pcc afile ai si numcopies mincopies key =<< getParsed r
)
FromRemote r -> checkFailedTransferDirection ai Download $ do
haskey <- flip Remote.hasKey key =<< getParsed r
@ -82,10 +82,11 @@ startKey o afile (si, key, ai) = case fromToOptions o of
Right False -> ifM (inAnnex key)
( do
(numcopies, mincopies) <- getnummincopies
Command.Drop.startLocal afile ai si numcopies mincopies key []
Command.Drop.startLocal pcc afile ai si numcopies mincopies key []
, stop
)
where
getnummincopies = case afile of
AssociatedFile Nothing -> (,) <$> getNumCopies <*> getMinCopies
AssociatedFile (Just af) -> getFileNumMinCopies af
pcc = Command.Drop.PreferredContentChecked False

View file

@ -293,7 +293,7 @@ toHereStart removewhen afile key ai si =
- repository reduces the number of copies, and should fail if
- that would violate numcopies settings.
-
- On the other hand, when the destiation repository does not already
- On the other hand, when the destination repository does not already
- have a copy of a file, it can be dropped without making numcopies
- worse, so the move is allowed even if numcopies is not met.
-
@ -311,7 +311,7 @@ toHereStart removewhen afile key ai si =
-}
willDropMakeItWorse :: UUID -> UUID -> DestStartedWithCopy -> Key -> AssociatedFile -> Annex DropCheck
willDropMakeItWorse srcuuid destuuid (DestStartedWithCopy deststartedwithcopy) key afile =
ifM (Command.Drop.checkRequiredContent srcuuid key afile)
ifM (Command.Drop.checkRequiredContent (Command.Drop.PreferredContentChecked False) srcuuid key afile)
( if deststartedwithcopy
then unlessforced DropCheckNumCopies
else ifM checktrustlevel

View file

@ -15,8 +15,6 @@ import Annex.Link
import Annex.Perms
import Annex.ReplaceFile
import Logs.Location
import Git.FilePath
import qualified Database.Keys
import Annex.InodeSentinal
import Utility.InodeCache
import qualified Utility.RawFilePath as R
@ -79,7 +77,7 @@ perform file oldkey newkey = do
, unlessM (Annex.getState Annex.force) $
giveup $ fromRawFilePath file ++ " is not available (use --force to override)"
)
next $ cleanup file oldkey newkey
next $ cleanup file newkey
{- Make a hard link to the old key content (when supported),
- to avoid wasting disk space. -}
@ -119,8 +117,8 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
LinkAnnexNoop -> True
)
cleanup :: RawFilePath -> Key -> Key -> CommandCleanup
cleanup file oldkey newkey = do
cleanup :: RawFilePath -> Key -> CommandCleanup
cleanup file newkey = do
ifM (isJust <$> isAnnexLink file)
( do
-- Update symlink to use the new key.
@ -131,8 +129,6 @@ cleanup file oldkey newkey = do
liftIO $ whenM (isJust <$> isPointerFile file) $
writePointerFile file newkey mode
stagePointerFile file mode =<< hashPointerFile newkey
Database.Keys.removeAssociatedFile oldkey
=<< inRepo (toTopFilePath file)
)
whenM (inAnnex newkey) $
logStatus newkey InfoPresent

View file

@ -1,6 +1,6 @@
{- Sqlite database of information about Keys
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -44,6 +44,9 @@ import Git.FilePath
import Git.Command
import Git.Types
import Git.Index
import Git.Sha
import Git.Branch (writeTreeQuiet, update')
import qualified Git.Ref
import Config.Smudge
import qualified Utility.RawFilePath as R
@ -52,10 +55,6 @@ import qualified Data.ByteString.Char8 as S8
import qualified System.FilePath.ByteString as P
{- Runs an action that reads from the database.
-
- If the database doesn't already exist, it's not created; mempty is
- returned instead. This way, when the keys database is not in use,
- there's minimal overhead in checking it.
-
- If the database is already open, any writes are flushed to it, to ensure
- consistency.
@ -73,7 +72,7 @@ runReader a = do
v <- a (SQL.ReadHandle qh)
return (v, st)
go DbClosed = do
st' <- openDb False DbClosed
st' <- openDb True DbClosed
v <- case st' of
(DbOpen qh) -> a (SQL.ReadHandle qh)
_ -> return mempty
@ -95,7 +94,7 @@ runWriter a = do
v <- a (SQL.WriteHandle qh)
return (v, st)
go st = do
st' <- openDb True st
st' <- openDb False st
v <- case st' of
DbOpen qh -> a (SQL.WriteHandle qh)
_ -> error "internal"
@ -104,7 +103,7 @@ runWriter a = do
runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex ()
runWriterIO a = runWriter (liftIO . a)
{- Opens the database, perhaps creating it if it doesn't exist yet.
{- Opens the database, creating it if it doesn't exist yet.
-
- Multiple readers and writers can have the database open at the same
- time. Database.Handle deals with the concurrency issues.
@ -115,22 +114,21 @@ runWriterIO a = runWriter (liftIO . a)
openDb :: Bool -> DbState -> Annex DbState
openDb _ st@(DbOpen _) = return st
openDb False DbUnavailable = return DbUnavailable
openDb createdb _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKeysDbLock $ do
openDb forwrite _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKeysDbLock $ do
dbdir <- fromRepo gitAnnexKeysDb
let db = dbdir P.</> "db"
dbexists <- liftIO $ R.doesPathExist db
case (dbexists, createdb) of
(True, _) -> open db
(False, True) -> do
case dbexists of
True -> open db
False -> do
initDb db SQL.createTables
open db
(False, False) -> return DbUnavailable
where
-- If permissions don't allow opening the database, treat it as if
-- it does not exist.
permerr e = case createdb of
False -> return DbUnavailable
True -> throwM e
-- If permissions don't allow opening the database, and it's being
-- opened for read, treat it as if it does not exist.
permerr e
| forwrite = throwM e
| otherwise = return DbUnavailable
open db = do
qh <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
@ -191,20 +189,17 @@ removeInodeCache = runWriterIO . SQL.removeInodeCache
isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool
isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
{- Looks at staged changes to find when unlocked files are copied/moved,
- and updates associated files in the keys database.
{- Looks at staged changes to annexed files, and updates the keys database,
- so that its information is consistent with the state of the repository.
-
- Since staged changes can be dropped later, does not remove any
- associated files; only adds new associated files.
-
- This needs to be run before querying the keys database so that
- information is consistent with the state of the repository.
- This is run with a lock held, so only one process can be running this at
- a time.
-
- To avoid unncessary work, the index file is statted, and if it's not
- changed since last time this was run, nothing is done.
-
- Note that this is run with a lock held, so only one process can be
- running this at a time.
- A tree is generated from the index, and the diff between that tree
- and the last processed tree is examined for changes.
-
- This also cleans up after a race between eg a git mv and git-annex
- get/drop/similar. If git moves the file between this being run and the
@ -218,34 +213,74 @@ isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
- filter. If a drop missed the file then the file is added back into the
- annex. If a get missed the file then the clean filter populates the
- file.
-
- There is a situation where, after this has run, the database can still
- contain associated files that have been deleted from the index.
- That happens when addAssociatedFile is used to record a newly
- added file, but that file then gets removed from the index before
- this is run. Eg, "git-annex add foo; git rm foo"
- So when using getAssociatedFiles, have to make sure the file still
- is an associated file.
-}
reconcileStaged :: H.DbQueue -> Annex ()
reconcileStaged qh = do
gitindex <- inRepo currentIndexFile
indexcache <- fromRawFilePath <$> fromRepo gitAnnexKeysDbIndexCache
withTSDelta (liftIO . genInodeCache gitindex) >>= \case
Just cur ->
liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case
Nothing -> go cur indexcache
Just prev -> ifM (compareInodeCaches prev cur)
( noop
, go cur indexcache
)
Just cur -> readindexcache indexcache >>= \case
Nothing -> go cur indexcache =<< getindextree
Just prev -> ifM (compareInodeCaches prev cur)
( noop
, go cur indexcache =<< getindextree
)
Nothing -> noop
where
go cur indexcache = do
(l, cleanup) <- inRepo $ pipeNullSplit' diff
lastindexref = Ref "refs/annex/last-index"
readindexcache indexcache = liftIO $ maybe Nothing readInodeCache
<$> catchMaybeIO (readFile indexcache)
getoldtree = fromMaybe emptyTree <$> inRepo (Git.Ref.sha lastindexref)
go cur indexcache (Just newtree) = do
oldtree <- getoldtree
when (oldtree /= newtree) $ do
updatetodiff (fromRef oldtree) (fromRef newtree)
liftIO $ writeFile indexcache $ showInodeCache cur
-- Storing the tree in a ref makes sure it does not
-- get garbage collected, and is available to diff
-- against next time.
inRepo $ update' lastindexref newtree
-- git write-tree will fail if the index is locked or when there is
-- a merge conflict. To get up-to-date with the current index,
-- diff --cached with the old index tree. The current index tree
-- is not known, so not recorded, and the inode cache is not updated,
-- so the next time git-annex runs, it will diff again, even
-- if the index is unchanged.
go _ _ Nothing = do
oldtree <- getoldtree
updatetodiff (fromRef oldtree) "--cached"
updatetodiff old new = do
(l, cleanup) <- inRepo $ pipeNullSplit' $ diff old new
changed <- procdiff l False
void $ liftIO cleanup
-- Flush database changes immediately
-- so other processes can see them.
when changed $
liftIO $ H.flushDbQueue qh
liftIO $ writeFile indexcache $ showInodeCache cur
diff =
-- Avoid running smudge or clean filters, since we want the
-- raw output, and they would block trying to access the
-- Avoid running smudge clean filter, which would block trying to
-- access the locked database. git write-tree sometimes calls it,
-- even though it is not adding work tree files to the index,
-- and so the filter cannot have an effect on the contents of the
-- index or on the tree that gets written from it.
getindextree = inRepo $ \r -> writeTreeQuiet $ r
{ gitGlobalOpts = gitGlobalOpts r ++ bypassSmudgeConfig }
diff old new =
-- Avoid running smudge clean filter, since we want the
-- raw output, and it would block trying to access the
-- locked database. The --raw normally avoids git diff
-- running them, but older versions of git need this.
bypassSmudgeConfig ++
@ -253,20 +288,18 @@ reconcileStaged qh = do
-- (The -G option may make it be used otherwise.)
[ Param "-c", Param "diff.external="
, Param "diff"
, Param "--cached"
, Param old
, Param new
, Param "--raw"
, Param "-z"
, Param "--no-abbrev"
-- Optimization: Only find pointer files. This is not
-- perfect. A file could start with this and not be a
-- pointer file. And a pointer file that is replaced with
-- a non-pointer file will match this.
, Param $ "-G^" ++ fromRawFilePath (toInternalGitPath $
-- Optimization: Limit to pointer files and annex symlinks.
-- This is not perfect. A file could contain with this and not
-- be a pointer file. And a pointer file that is replaced with
-- a non-pointer file will match this. This is only a
-- prefilter so that's ok.
, Param $ "-G" ++ fromRawFilePath (toInternalGitPath $
P.pathSeparator `S.cons` objectDir')
-- Don't include files that were deleted, because this only
-- wants to update information for files that are present
-- in the index.
, Param "--diff-filter=AMUT"
-- Disable rename detection.
, Param "--no-renames"
-- Avoid other complications.
@ -276,20 +309,28 @@ reconcileStaged qh = do
procdiff (info:file:rest) changed
| ":" `S.isPrefixOf` info = case S8.words info of
(_colonsrcmode:dstmode:_srcsha:dstsha:_change:[])
-- Only want files, not symlinks
| dstmode /= fmtTreeItemType TreeSymlink -> do
maybe noop (reconcile (asTopFilePath file))
=<< catKey (Ref dstsha)
procdiff rest True
| otherwise -> procdiff rest changed
(_colonsrcmode:dstmode:srcsha:dstsha:_change:[]) -> do
removed <- catKey (Ref srcsha) >>= \case
Just oldkey -> do
liftIO $ SQL.removeAssociatedFile oldkey
(asTopFilePath file)
(SQL.WriteHandle qh)
return True
Nothing -> return False
added <- catKey (Ref dstsha) >>= \case
Just key -> do
liftIO $ SQL.addAssociatedFile key
(asTopFilePath file)
(SQL.WriteHandle qh)
when (dstmode /= fmtTreeItemType TreeSymlink) $
reconcilerace (asTopFilePath file) key
return True
Nothing -> return False
procdiff rest (changed || removed || added)
_ -> return changed -- parse failed
procdiff _ changed = return changed
-- Note that database writes done in here will not necessarily
-- be visible to database reads also done in here.
reconcile file key = do
liftIO $ SQL.addAssociatedFileFast key file (SQL.WriteHandle qh)
reconcilerace file key = do
caches <- liftIO $ SQL.getInodeCaches key (SQL.ReadHandle qh)
keyloc <- calcRepo (gitAnnexLocation key)
keypopulated <- sameInodeCache keyloc caches

View file

@ -185,8 +185,18 @@ commitAlways :: CommitMode -> String -> Branch -> [Ref] -> Repo -> IO Sha
commitAlways commitmode message branch parentrefs repo = fromJust
<$> commit commitmode True message branch parentrefs repo
-- Throws exception if the index is locked, with an error message output by
-- git on stderr.
writeTree :: Repo -> IO Sha
writeTree repo = getSha "write-tree" $ pipeReadStrict [Param "write-tree"] repo
writeTree repo = getSha "write-tree" $
pipeReadStrict [Param "write-tree"] repo
-- Avoids error output if the command fails due to eg, the index being locked.
writeTreeQuiet :: Repo -> IO (Maybe Sha)
writeTreeQuiet repo = extractSha <$> withNullHandle go
where
go nullh = pipeReadStrict' (\p -> p { std_err = UseHandle nullh })
[Param "write-tree"] repo
commitTree :: CommitMode -> String -> [Ref] -> Ref -> Repo -> IO Sha
commitTree commitmode message parentrefs tree repo =

View file

@ -70,17 +70,15 @@ pipeReadLazy params repo = assertLocal repo $ do
- Nonzero exit status is ignored.
-}
pipeReadStrict :: [CommandParam] -> Repo -> IO S.ByteString
pipeReadStrict = pipeReadStrict' S.hGetContents
pipeReadStrict = pipeReadStrict' id
{- The reader action must be strict. -}
pipeReadStrict' :: (Handle -> IO a) -> [CommandParam] -> Repo -> IO a
pipeReadStrict' reader params repo = assertLocal repo $ withCreateProcess p go
pipeReadStrict' :: (CreateProcess -> CreateProcess) -> [CommandParam] -> Repo -> IO S.ByteString
pipeReadStrict' fp params repo = assertLocal repo $ withCreateProcess p go
where
p = (gitCreateProcess params repo)
{ std_out = CreatePipe }
p = fp (gitCreateProcess params repo) { std_out = CreatePipe }
go _ (Just outh) _ pid = do
output <- reader outh
output <- S.hGetContents outh
hClose outh
void $ waitForProcess pid
return output

View file

@ -19,7 +19,7 @@ addWantGet = addPreferredContentLimit $
addWantDrop :: Annex ()
addWantDrop = addPreferredContentLimit $
checkWant $ wantDrop False Nothing Nothing
checkWant $ \af -> wantDrop False Nothing Nothing af (Just [])
addPreferredContentLimit :: (MatchInfo -> Annex Bool) -> Annex ()
addPreferredContentLimit a = do

View file

@ -46,8 +46,8 @@ import Logs.Remote
import Types.StandardGroups
import Limit
{- Checks if a file is preferred content for the specified repository
- (or the current repository if none is specified). -}
{- Checks if a file is preferred content (or required content) for the
- specified repository (or the current repository if none is specified). -}
isPreferredContent :: Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool
isPreferredContent = checkMap preferredContentMap

22
Test.hs
View file

@ -376,6 +376,7 @@ unitTests note = testGroup ("Unit Tests " ++ note)
, testCase "bup remote" test_bup_remote
, testCase "crypto" test_crypto
, testCase "preferred content" test_preferred_content
, testCase "required_content" test_required_content
, testCase "add subdirs" test_add_subdirs
, testCase "addurl" test_addurl
]
@ -749,6 +750,27 @@ test_preferred_content = intmpclonerepo $ do
git_annex "get" ["--auto", annexedfile] "get --auto of file with exclude=*"
annexed_notpresent annexedfile
test_required_content :: Assertion
test_required_content = intmpclonerepo $ do
git_annex "get" [annexedfile] "get"
annexed_present annexedfile
git_annex "required" [".", "include=" ++ annexedfile] "annexedfile required"
git_annex_shouldfail "drop" [annexedfile] "drop of required content should fail"
annexed_present annexedfile
git_annex "drop" ["--auto", annexedfile] "drop --auto of required content skips it"
annexed_present annexedfile
writecontent annexedfiledup $ content annexedfiledup
add_annex annexedfiledup "add of second file with same content failed"
annexed_present annexedfiledup
annexed_present annexedfile
git_annex_shouldfail "drop" [annexedfiledup] "drop of file sharing required content should fail"
annexed_present annexedfiledup
annexed_present annexedfile
test_lock :: Assertion
test_lock = intmpclonerepo $ do
annexed_notpresent annexedfile

View file

@ -47,7 +47,7 @@ upgrade automatic = flip catchNonAsync onexception $ do
, do
checkGitVersionForIndirectUpgrade
)
scanUnlockedFiles
scanAnnexedFiles
configureSmudgeFilter
-- Inode sentinal file was only used in direct mode and when
-- locking down files as they were added. In v6, it's used more

View file

@ -19,3 +19,5 @@ So, this seems solvable in v7 repositories, but not in v5.
Also, the associated files map may not be accurate at all times, so that's
a wrinkle to using it for this. Also, only unlocked files get into the
associated files map. --[[Joey]]
> [[fixed|done]] --[[Joey]]

View file

@ -134,16 +134,17 @@ in either of two repositories.
Matches files that the preferred content settings for the repository
make it want to get. Note that this will match even files that are
already present, unless limited with e.g., `--not --in .`
Note that this will not match anything when using --all or --unused.
* `--want-drop`
Matches files that the preferred content settings for the repository
make it want to drop. Note that this will match even files that have
already been dropped, unless limited with e.g., `--in .`
Note that this will not match anything when using --all or --unused.
Files that this matches will not necessarily be dropped by
`git-annex drop --auto`. This does not check that there are enough copies
to drop. Also the same content may be used by a file that is not wanted
to be dropped.
* `--accessedwithin=interval`

View file

@ -9,7 +9,12 @@ If most of the files are locked, that would actually make the scan
somewhere around twice as slow as it currently is. So not a worthwhile
optimisation.
And I don't see much else there that could be optimised. Possibly the
Update: Now that the scan also scans for locked files to make the
associated files include information about them, the catKey optimisation
did make sense. Unfortunately, that does mean this scan got a little bit
slower still, since it has to use git ls-tree --long.
I don't see much else there that could be optimised. Possibly the
ls-tree parser could be made faster but it's already using attoparsec
so unlikely to be many gains.
"""]]