verify associated files when checking numcopies
Most of this is just refactoring. But, handleDropsFrom did not verify that associated files from the keys db were still accurate, and has now been fixed to. A minor improvement to this would be to avoid calling catKeyFile twice on the same file, when getting the numcopies and mincopies value, in the common case where the same file has the highest value for both. But, it avoids checking every associated file, so it will scale well to lots of dups already. Sponsored-by: Kevin Mueller on Patreon
This commit is contained in:
parent
d164434679
commit
af9fdf5dba
5 changed files with 68 additions and 10 deletions
|
@ -52,11 +52,7 @@ type Reason = String
|
|||
handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> SeekInput -> [VerifiedCopy] -> (CommandStart -> CommandCleanup) -> Annex ()
|
||||
handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
|
||||
g <- Annex.gitRepo
|
||||
l <- map (`fromTopFilePath` g)
|
||||
<$> Database.Keys.getAssociatedFiles key
|
||||
let fs = case afile of
|
||||
AssociatedFile (Just f) -> f : filter (/= f) l
|
||||
AssociatedFile Nothing -> l
|
||||
fs <- Database.Keys.getAssociatedFilesIncluding afile key
|
||||
n <- getcopies fs
|
||||
void $ if fromhere && checkcopies n Nothing
|
||||
then go fs rs n >>= dropl fs
|
||||
|
@ -64,11 +60,7 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
|
|||
where
|
||||
getcopies fs = do
|
||||
(untrusted, have) <- trustPartition UnTrusted locs
|
||||
(numcopies, mincopies) <- if null fs
|
||||
then (,) <$> getNumCopies <*> getMinCopies
|
||||
else do
|
||||
l <- mapM getFileNumMinCopies fs
|
||||
return (maximum $ map fst l, maximum $ map snd l)
|
||||
(numcopies, mincopies) <- getSafestNumMinCopies' key fs
|
||||
return (length have, numcopies, mincopies, S.fromList untrusted)
|
||||
|
||||
{- Check that we have enough copies still to drop the content.
|
||||
|
|
|
@ -12,6 +12,8 @@ module Annex.NumCopies (
|
|||
module Logs.NumCopies,
|
||||
getFileNumMinCopies,
|
||||
getAssociatedFileNumMinCopies,
|
||||
getSafestNumMinCopies,
|
||||
getSafestNumMinCopies',
|
||||
getGlobalFileNumCopies,
|
||||
getNumCopies,
|
||||
getMinCopies,
|
||||
|
@ -34,6 +36,8 @@ import qualified Remote
|
|||
import qualified Types.Remote as Remote
|
||||
import Annex.Content
|
||||
import Annex.UUID
|
||||
import Annex.CatFile
|
||||
import qualified Database.Keys
|
||||
|
||||
import Control.Exception
|
||||
import qualified Control.Monad.Catch as M
|
||||
|
@ -119,6 +123,11 @@ getFileNumMinCopies f = do
|
|||
<$> fallbacknum
|
||||
<*> fallbackmin
|
||||
|
||||
{- NumCopies and MinCopies value for an associated file, or the default
|
||||
- when there is no associated file.
|
||||
-
|
||||
- This does not include other associated files using the same key.
|
||||
-}
|
||||
getAssociatedFileNumMinCopies :: AssociatedFile -> Annex (NumCopies, MinCopies)
|
||||
getAssociatedFileNumMinCopies (AssociatedFile (Just file)) =
|
||||
getFileNumMinCopies file
|
||||
|
@ -126,6 +135,44 @@ getAssociatedFileNumMinCopies (AssociatedFile Nothing) = (,)
|
|||
<$> getNumCopies
|
||||
<*> getMinCopies
|
||||
|
||||
{- Gets the highest NumCopies and MinCopies value for all files
|
||||
- associated with a key. Provide any known associated file;
|
||||
- the rest are looked up from the database.
|
||||
-
|
||||
- Using this when dropping avoids dropping one file that
|
||||
- has a smaller value violating the value set for another file
|
||||
- that uses the same content.
|
||||
-}
|
||||
getSafestNumMinCopies :: AssociatedFile -> Key -> Annex (NumCopies, MinCopies)
|
||||
getSafestNumMinCopies afile k =
|
||||
Database.Keys.getAssociatedFilesIncluding afile k
|
||||
>>= getSafestNumMinCopies' k
|
||||
|
||||
getSafestNumMinCopies' :: Key -> [RawFilePath] -> Annex (NumCopies, MinCopies)
|
||||
getSafestNumMinCopies' k fs = do
|
||||
l <- mapM getFileNumMinCopies fs
|
||||
let l' = zip l fs
|
||||
(,)
|
||||
<$> findmax fst l' getNumCopies
|
||||
<*> findmax snd l' getMinCopies
|
||||
where
|
||||
-- Some associated files in the keys database may no longer
|
||||
-- correspond to files in the repository.
|
||||
stillassociated f = catKeyFile f >>= \case
|
||||
Just k' | k' == k -> return True
|
||||
_ -> return False
|
||||
|
||||
-- Avoid calling stillassociated on every file; just make sure
|
||||
-- that the one with the highest value is still associated.
|
||||
findmax _ [] fallback = fallback
|
||||
findmax getv l fallback = do
|
||||
let n = maximum (map (getv . fst) l)
|
||||
let (maxls, l') = partition (\(x, _) -> getv x == n) l
|
||||
ifM (anyM stillassociated (map snd maxls))
|
||||
( return n
|
||||
, findmax getv l' fallback
|
||||
)
|
||||
|
||||
{- This is the globally visible numcopies value for a file. So it does
|
||||
- not include local configuration in the git config or command line
|
||||
- options. -}
|
||||
|
|
|
@ -4,6 +4,8 @@ git-annex (8.20210429) UNRELEASED; urgency=medium
|
|||
* When two files have the same content, and a required content expression
|
||||
matches one but not the other, dropping the latter file will fail as it
|
||||
would also remove the content of the required file.
|
||||
* drop, move, import: When two files have the same content, and
|
||||
different numcopies or requiredcopies values, use the higher value.
|
||||
* drop --auto: When two files have the same content, and a preferred content
|
||||
expression matches one but not the other, do not drop the content.
|
||||
* sync --content, assistant: When two unlocked files have the same
|
||||
|
|
|
@ -14,6 +14,7 @@ module Database.Keys (
|
|||
closeDb,
|
||||
addAssociatedFile,
|
||||
getAssociatedFiles,
|
||||
getAssociatedFilesIncluding,
|
||||
getAssociatedKey,
|
||||
removeAssociatedFile,
|
||||
storeInodeCaches,
|
||||
|
@ -155,6 +156,15 @@ addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f
|
|||
getAssociatedFiles :: Key -> Annex [TopFilePath]
|
||||
getAssociatedFiles = runReaderIO . SQL.getAssociatedFiles
|
||||
|
||||
{- Include a known associated file along with any recorded in the database. -}
|
||||
getAssociatedFilesIncluding :: AssociatedFile -> Key -> Annex [RawFilePath]
|
||||
getAssociatedFilesIncluding afile k = do
|
||||
g <- Annex.gitRepo
|
||||
l <- map (`fromTopFilePath` g) <$> getAssociatedFiles k
|
||||
return $ case afile of
|
||||
AssociatedFile (Just f) -> f : filter (/= f) l
|
||||
AssociatedFile Nothing -> l
|
||||
|
||||
{- Gets any keys that are on record as having a particular associated file.
|
||||
- (Should be one or none but the database doesn't enforce that.) -}
|
||||
getAssociatedKey :: TopFilePath -> Annex [Key]
|
||||
|
|
|
@ -15,3 +15,10 @@ differently than in a non-bare repo. (Also if this is done, the preferred
|
|||
content checking should also behave the same way.) The docs for --all
|
||||
do say that it bypasses checking .gitattributes numcopies.
|
||||
--[[Joey]]
|
||||
|
||||
> Note that the assistant and git-annex sync already check numcopies
|
||||
> for all known associated files, so already handled this for unlocked
|
||||
> files. With the recent change to also track
|
||||
> associated files for locked files, they also handle it for those.
|
||||
>
|
||||
> But, git-annex drop/move/import don't yet.
|
||||
|
|
Loading…
Reference in a new issue