import --reinject-duplicates

This is the same as running git annex reinject --known, followed by
git-annex import. The advantage to having it in one command is that it
only has to hash each file once; the two commands have to
hash the imported files a second time.

This commit was sponsored by Shane-o on Patreon.
This commit is contained in:
Joey Hess 2017-02-09 15:40:44 -04:00
parent eae186e4b9
commit c1ece47ea0
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
5 changed files with 50 additions and 19 deletions

View file

@ -46,6 +46,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
* import: --deduplicate and --skip-duplicates were implemented * import: --deduplicate and --skip-duplicates were implemented
inneficiently; they unncessarily hashed each file twice. They have inneficiently; they unncessarily hashed each file twice. They have
been improved to only hash once. been improved to only hash once.
* import: Added --reinject-duplicates.
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400 -- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400

View file

@ -11,6 +11,7 @@ import Command
import qualified Git import qualified Git
import qualified Annex import qualified Annex
import qualified Command.Add import qualified Command.Add
import qualified Command.Reinject
import Utility.CopyFile import Utility.CopyFile
import Backend import Backend
import Types.KeySource import Types.KeySource
@ -28,7 +29,7 @@ cmd = withGlobalOptions (jobsOption : jsonOption : fileMatchingOptions) $ notBar
"move and add files from outside git working copy" "move and add files from outside git working copy"
paramPaths (seek <$$> optParser) paramPaths (seek <$$> optParser)
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
deriving (Eq) deriving (Eq)
data ImportOptions = ImportOptions data ImportOptions = ImportOptions
@ -57,7 +58,11 @@ duplicateModeParser =
) )
<|> flag' SkipDuplicates <|> flag' SkipDuplicates
( long "skip-duplicates" ( long "skip-duplicates"
<> help "import only new files" <> help "import only new files (do not delete source files)"
)
<|> flag' ReinjectDuplicates
( long "reinject-duplicates"
<> help "import new files, and reinject the content of files that were imported before"
) )
seek :: ImportOptions -> CommandSeek seek :: ImportOptions -> CommandSeek
@ -88,6 +93,9 @@ start largematcher mode (srcfile, destfile) =
warning "Could not verify that the content is still present in the annex; not removing from the import location." warning "Could not verify that the content is still present in the annex; not removing from the import location."
stop stop
) )
reinject k = do
showNote "reinjecting"
Command.Reinject.perform srcfile k
importfile ld k = checkdestdir $ do importfile ld k = checkdestdir $ do
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
if ignored if ignored
@ -184,6 +192,9 @@ start largematcher mode (srcfile, destfile) =
SkipDuplicates -> checkdup k SkipDuplicates -> checkdup k
(skipbecause "duplicate") (skipbecause "duplicate")
(importfile ld k) (importfile ld k)
ReinjectDuplicates -> checkdup k
(reinject k)
(importfile ld k)
_ -> importfile ld k _ -> importfile ld k
skipbecause s = showNote (s ++ "; skipping") >> next (return True) skipbecause s = showNote (s ++ "; skipping") >> next (return True)

View file

@ -43,9 +43,12 @@ startSrcDest (src:dest:[])
| src == dest = stop | src == dest = stop
| otherwise = notAnnexed src $ do | otherwise = notAnnexed src $ do
showStart "reinject" dest showStart "reinject" dest
next $ ifAnnexed dest next $ ifAnnexed dest go stop
(\key -> perform src key (verifyKeyContent DefaultVerify UnVerified key src)) where
stop go key = ifM (verifyKeyContent DefaultVerify UnVerified key src)
( perform src key
, error "failed"
)
startSrcDest _ = giveup "specify a src file and a dest file" startSrcDest _ = giveup "specify a src file and a dest file"
startKnown :: FilePath -> CommandStart startKnown :: FilePath -> CommandStart
@ -55,7 +58,7 @@ startKnown src = notAnnexed src $ do
case mkb of case mkb of
Nothing -> error "Failed to generate key" Nothing -> error "Failed to generate key"
Just (key, _) -> ifM (isKnownKey key) Just (key, _) -> ifM (isKnownKey key)
( next $ perform src key (return True) ( next $ perform src key
, do , do
warning "Not known content; skipping" warning "Not known content; skipping"
next $ next $ return True next $ next $ return True
@ -65,19 +68,15 @@ notAnnexed :: FilePath -> CommandStart -> CommandStart
notAnnexed src = ifAnnexed src $ notAnnexed src = ifAnnexed src $
giveup $ "cannot used annexed file as src: " ++ src giveup $ "cannot used annexed file as src: " ++ src
perform :: FilePath -> Key -> Annex Bool -> CommandPerform perform :: FilePath -> Key -> CommandPerform
perform src key verify = ifM move perform src key = ifM move
( next $ cleanup key ( next $ cleanup key
, error "failed" , error "failed"
) )
where where
move = checkDiskSpaceToGet key False $ move = checkDiskSpaceToGet key False $ do
ifM verify moveAnnex key src
( do return True
moveAnnex key src
return True
, return False
)
cleanup :: Key -> CommandCleanup cleanup :: Key -> CommandCleanup
cleanup key = do cleanup key = do

View file

@ -33,10 +33,9 @@ Several options can be used to adjust handling of duplicate files.
Do not delete files from the import location. Do not delete files from the import location.
This could allow importing the same files repeatedly Running with this option repeatedly can import the same files into
to different locations in a repository. More likely, it could be used to different git repositories, or branches, or different locations in a git
import the same files to a number of different branches or separate git repository.
repositories.
* `--deduplicate` * `--deduplicate`
@ -53,6 +52,12 @@ Several options can be used to adjust handling of duplicate files.
Does not import any files, but any files found in the import location Does not import any files, but any files found in the import location
that are duplicates are deleted. that are duplicates are deleted.
* `--reinject-duplicates`
Imports files that are not duplicates. Files that are duplicates have
their content reinjected into the annex (similar to
[[git-annex-reinject]]).
* `--force` * `--force`
Allow existing files to be overwritten by newly imported files. Allow existing files to be overwritten by newly imported files.

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2017-02-09T19:33:46Z"
content="""
Actually, import --deduplicate, --skip-duplicates, --clean-duplicates
are implemeted naively and do hash files twice. So it's
the same efficiency..
But, I just finished a more complicated implementation that avoids
the second hashing.
That does make the combined action worth adding, I suppose. Done so as
--reinject-duplicates.
"""]]