import --reinject-duplicates

This is the same as running git annex reinject --known, followed by
git-annex import. The advantage to having it in one command is that it
only has to hash each file once; the two commands have to
hash the imported files a second time.

This commit was sponsored by Shane-o on Patreon.
This commit is contained in:
Joey Hess 2017-02-09 15:40:44 -04:00
parent eae186e4b9
commit c1ece47ea0
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
5 changed files with 50 additions and 19 deletions

View file

@ -46,6 +46,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
* import: --deduplicate and --skip-duplicates were implemented
inneficiently; they unncessarily hashed each file twice. They have
been improved to only hash once.
* import: Added --reinject-duplicates.
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400

View file

@ -11,6 +11,7 @@ import Command
import qualified Git
import qualified Annex
import qualified Command.Add
import qualified Command.Reinject
import Utility.CopyFile
import Backend
import Types.KeySource
@ -28,7 +29,7 @@ cmd = withGlobalOptions (jobsOption : jsonOption : fileMatchingOptions) $ notBar
"move and add files from outside git working copy"
paramPaths (seek <$$> optParser)
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
deriving (Eq)
data ImportOptions = ImportOptions
@ -57,7 +58,11 @@ duplicateModeParser =
)
<|> flag' SkipDuplicates
( long "skip-duplicates"
<> help "import only new files"
<> help "import only new files (do not delete source files)"
)
<|> flag' ReinjectDuplicates
( long "reinject-duplicates"
<> help "import new files, and reinject the content of files that were imported before"
)
seek :: ImportOptions -> CommandSeek
@ -88,6 +93,9 @@ start largematcher mode (srcfile, destfile) =
warning "Could not verify that the content is still present in the annex; not removing from the import location."
stop
)
reinject k = do
showNote "reinjecting"
Command.Reinject.perform srcfile k
importfile ld k = checkdestdir $ do
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
if ignored
@ -184,6 +192,9 @@ start largematcher mode (srcfile, destfile) =
SkipDuplicates -> checkdup k
(skipbecause "duplicate")
(importfile ld k)
ReinjectDuplicates -> checkdup k
(reinject k)
(importfile ld k)
_ -> importfile ld k
skipbecause s = showNote (s ++ "; skipping") >> next (return True)

View file

@ -43,9 +43,12 @@ startSrcDest (src:dest:[])
| src == dest = stop
| otherwise = notAnnexed src $ do
showStart "reinject" dest
next $ ifAnnexed dest
(\key -> perform src key (verifyKeyContent DefaultVerify UnVerified key src))
stop
next $ ifAnnexed dest go stop
where
go key = ifM (verifyKeyContent DefaultVerify UnVerified key src)
( perform src key
, error "failed"
)
startSrcDest _ = giveup "specify a src file and a dest file"
startKnown :: FilePath -> CommandStart
@ -55,7 +58,7 @@ startKnown src = notAnnexed src $ do
case mkb of
Nothing -> error "Failed to generate key"
Just (key, _) -> ifM (isKnownKey key)
( next $ perform src key (return True)
( next $ perform src key
, do
warning "Not known content; skipping"
next $ next $ return True
@ -65,19 +68,15 @@ notAnnexed :: FilePath -> CommandStart -> CommandStart
notAnnexed src = ifAnnexed src $
giveup $ "cannot used annexed file as src: " ++ src
perform :: FilePath -> Key -> Annex Bool -> CommandPerform
perform src key verify = ifM move
perform :: FilePath -> Key -> CommandPerform
perform src key = ifM move
( next $ cleanup key
, error "failed"
)
where
move = checkDiskSpaceToGet key False $
ifM verify
( do
moveAnnex key src
return True
, return False
)
move = checkDiskSpaceToGet key False $ do
moveAnnex key src
return True
cleanup :: Key -> CommandCleanup
cleanup key = do

View file

@ -33,10 +33,9 @@ Several options can be used to adjust handling of duplicate files.
Do not delete files from the import location.
This could allow importing the same files repeatedly
to different locations in a repository. More likely, it could be used to
import the same files to a number of different branches or separate git
repositories.
Running with this option repeatedly can import the same files into
different git repositories, or branches, or different locations in a git
repository.
* `--deduplicate`
@ -53,6 +52,12 @@ Several options can be used to adjust handling of duplicate files.
Does not import any files, but any files found in the import location
that are duplicates are deleted.
* `--reinject-duplicates`
Imports files that are not duplicates. Files that are duplicates have
their content reinjected into the annex (similar to
[[git-annex-reinject]]).
* `--force`
Allow existing files to be overwritten by newly imported files.

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2017-02-09T19:33:46Z"
content="""
Actually, import --deduplicate, --skip-duplicates, --clean-duplicates
are implemeted naively and do hash files twice. So it's
the same efficiency..
But, I just finished a more complicated implementation that avoids
the second hashing.
That does make the combined action worth adding, I suppose. Done so as
--reinject-duplicates.
"""]]