import --reinject-duplicates
This is the same as running git annex reinject --known, followed by git-annex import. The advantage to having it in one command is that it only has to hash each file once; the two commands have to hash the imported files a second time. This commit was sponsored by Shane-o on Patreon.
This commit is contained in:
parent
eae186e4b9
commit
c1ece47ea0
5 changed files with 50 additions and 19 deletions
|
@ -46,6 +46,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
|
|||
* import: --deduplicate and --skip-duplicates were implemented
|
||||
inneficiently; they unncessarily hashed each file twice. They have
|
||||
been improved to only hash once.
|
||||
* import: Added --reinject-duplicates.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import Command
|
|||
import qualified Git
|
||||
import qualified Annex
|
||||
import qualified Command.Add
|
||||
import qualified Command.Reinject
|
||||
import Utility.CopyFile
|
||||
import Backend
|
||||
import Types.KeySource
|
||||
|
@ -28,7 +29,7 @@ cmd = withGlobalOptions (jobsOption : jsonOption : fileMatchingOptions) $ notBar
|
|||
"move and add files from outside git working copy"
|
||||
paramPaths (seek <$$> optParser)
|
||||
|
||||
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates
|
||||
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
|
||||
deriving (Eq)
|
||||
|
||||
data ImportOptions = ImportOptions
|
||||
|
@ -57,7 +58,11 @@ duplicateModeParser =
|
|||
)
|
||||
<|> flag' SkipDuplicates
|
||||
( long "skip-duplicates"
|
||||
<> help "import only new files"
|
||||
<> help "import only new files (do not delete source files)"
|
||||
)
|
||||
<|> flag' ReinjectDuplicates
|
||||
( long "reinject-duplicates"
|
||||
<> help "import new files, and reinject the content of files that were imported before"
|
||||
)
|
||||
|
||||
seek :: ImportOptions -> CommandSeek
|
||||
|
@ -88,6 +93,9 @@ start largematcher mode (srcfile, destfile) =
|
|||
warning "Could not verify that the content is still present in the annex; not removing from the import location."
|
||||
stop
|
||||
)
|
||||
reinject k = do
|
||||
showNote "reinjecting"
|
||||
Command.Reinject.perform srcfile k
|
||||
importfile ld k = checkdestdir $ do
|
||||
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
|
||||
if ignored
|
||||
|
@ -184,6 +192,9 @@ start largematcher mode (srcfile, destfile) =
|
|||
SkipDuplicates -> checkdup k
|
||||
(skipbecause "duplicate")
|
||||
(importfile ld k)
|
||||
ReinjectDuplicates -> checkdup k
|
||||
(reinject k)
|
||||
(importfile ld k)
|
||||
_ -> importfile ld k
|
||||
skipbecause s = showNote (s ++ "; skipping") >> next (return True)
|
||||
|
||||
|
|
|
@ -43,9 +43,12 @@ startSrcDest (src:dest:[])
|
|||
| src == dest = stop
|
||||
| otherwise = notAnnexed src $ do
|
||||
showStart "reinject" dest
|
||||
next $ ifAnnexed dest
|
||||
(\key -> perform src key (verifyKeyContent DefaultVerify UnVerified key src))
|
||||
stop
|
||||
next $ ifAnnexed dest go stop
|
||||
where
|
||||
go key = ifM (verifyKeyContent DefaultVerify UnVerified key src)
|
||||
( perform src key
|
||||
, error "failed"
|
||||
)
|
||||
startSrcDest _ = giveup "specify a src file and a dest file"
|
||||
|
||||
startKnown :: FilePath -> CommandStart
|
||||
|
@ -55,7 +58,7 @@ startKnown src = notAnnexed src $ do
|
|||
case mkb of
|
||||
Nothing -> error "Failed to generate key"
|
||||
Just (key, _) -> ifM (isKnownKey key)
|
||||
( next $ perform src key (return True)
|
||||
( next $ perform src key
|
||||
, do
|
||||
warning "Not known content; skipping"
|
||||
next $ next $ return True
|
||||
|
@ -65,19 +68,15 @@ notAnnexed :: FilePath -> CommandStart -> CommandStart
|
|||
notAnnexed src = ifAnnexed src $
|
||||
giveup $ "cannot used annexed file as src: " ++ src
|
||||
|
||||
perform :: FilePath -> Key -> Annex Bool -> CommandPerform
|
||||
perform src key verify = ifM move
|
||||
perform :: FilePath -> Key -> CommandPerform
|
||||
perform src key = ifM move
|
||||
( next $ cleanup key
|
||||
, error "failed"
|
||||
)
|
||||
where
|
||||
move = checkDiskSpaceToGet key False $
|
||||
ifM verify
|
||||
( do
|
||||
moveAnnex key src
|
||||
return True
|
||||
, return False
|
||||
)
|
||||
move = checkDiskSpaceToGet key False $ do
|
||||
moveAnnex key src
|
||||
return True
|
||||
|
||||
cleanup :: Key -> CommandCleanup
|
||||
cleanup key = do
|
||||
|
|
|
@ -33,10 +33,9 @@ Several options can be used to adjust handling of duplicate files.
|
|||
|
||||
Do not delete files from the import location.
|
||||
|
||||
This could allow importing the same files repeatedly
|
||||
to different locations in a repository. More likely, it could be used to
|
||||
import the same files to a number of different branches or separate git
|
||||
repositories.
|
||||
Running with this option repeatedly can import the same files into
|
||||
different git repositories, or branches, or different locations in a git
|
||||
repository.
|
||||
|
||||
* `--deduplicate`
|
||||
|
||||
|
@ -53,6 +52,12 @@ Several options can be used to adjust handling of duplicate files.
|
|||
Does not import any files, but any files found in the import location
|
||||
that are duplicates are deleted.
|
||||
|
||||
* `--reinject-duplicates`
|
||||
|
||||
Imports files that are not duplicates. Files that are duplicates have
|
||||
their content reinjected into the annex (similar to
|
||||
[[git-annex-reinject]]).
|
||||
|
||||
* `--force`
|
||||
|
||||
Allow existing files to be overwritten by newly imported files.
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 4"""
|
||||
date="2017-02-09T19:33:46Z"
|
||||
content="""
|
||||
Actually, import --deduplicate, --skip-duplicates, --clean-duplicates
|
||||
are implemeted naively and do hash files twice. So it's
|
||||
the same efficiency..
|
||||
|
||||
But, I just finished a more complicated implementation that avoids
|
||||
the second hashing.
|
||||
|
||||
That does make the combined action worth adding, I suppose. Done so as
|
||||
--reinject-duplicates.
|
||||
"""]]
|
Loading…
Add table
Reference in a new issue