import --reinject-duplicates
This is the same as running git annex reinject --known, followed by git-annex import. The advantage to having it in one command is that it only has to hash each file once; the two commands have to hash the imported files a second time. This commit was sponsored by Shane-o on Patreon.
This commit is contained in:
parent
eae186e4b9
commit
c1ece47ea0
5 changed files with 50 additions and 19 deletions
|
@ -46,6 +46,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
|
||||||
* import: --deduplicate and --skip-duplicates were implemented
|
* import: --deduplicate and --skip-duplicates were implemented
|
||||||
inneficiently; they unncessarily hashed each file twice. They have
|
inneficiently; they unncessarily hashed each file twice. They have
|
||||||
been improved to only hash once.
|
been improved to only hash once.
|
||||||
|
* import: Added --reinject-duplicates.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400
|
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ import Command
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified Command.Add
|
import qualified Command.Add
|
||||||
|
import qualified Command.Reinject
|
||||||
import Utility.CopyFile
|
import Utility.CopyFile
|
||||||
import Backend
|
import Backend
|
||||||
import Types.KeySource
|
import Types.KeySource
|
||||||
|
@ -28,7 +29,7 @@ cmd = withGlobalOptions (jobsOption : jsonOption : fileMatchingOptions) $ notBar
|
||||||
"move and add files from outside git working copy"
|
"move and add files from outside git working copy"
|
||||||
paramPaths (seek <$$> optParser)
|
paramPaths (seek <$$> optParser)
|
||||||
|
|
||||||
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates
|
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
data ImportOptions = ImportOptions
|
data ImportOptions = ImportOptions
|
||||||
|
@ -57,7 +58,11 @@ duplicateModeParser =
|
||||||
)
|
)
|
||||||
<|> flag' SkipDuplicates
|
<|> flag' SkipDuplicates
|
||||||
( long "skip-duplicates"
|
( long "skip-duplicates"
|
||||||
<> help "import only new files"
|
<> help "import only new files (do not delete source files)"
|
||||||
|
)
|
||||||
|
<|> flag' ReinjectDuplicates
|
||||||
|
( long "reinject-duplicates"
|
||||||
|
<> help "import new files, and reinject the content of files that were imported before"
|
||||||
)
|
)
|
||||||
|
|
||||||
seek :: ImportOptions -> CommandSeek
|
seek :: ImportOptions -> CommandSeek
|
||||||
|
@ -88,6 +93,9 @@ start largematcher mode (srcfile, destfile) =
|
||||||
warning "Could not verify that the content is still present in the annex; not removing from the import location."
|
warning "Could not verify that the content is still present in the annex; not removing from the import location."
|
||||||
stop
|
stop
|
||||||
)
|
)
|
||||||
|
reinject k = do
|
||||||
|
showNote "reinjecting"
|
||||||
|
Command.Reinject.perform srcfile k
|
||||||
importfile ld k = checkdestdir $ do
|
importfile ld k = checkdestdir $ do
|
||||||
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
|
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
|
||||||
if ignored
|
if ignored
|
||||||
|
@ -184,6 +192,9 @@ start largematcher mode (srcfile, destfile) =
|
||||||
SkipDuplicates -> checkdup k
|
SkipDuplicates -> checkdup k
|
||||||
(skipbecause "duplicate")
|
(skipbecause "duplicate")
|
||||||
(importfile ld k)
|
(importfile ld k)
|
||||||
|
ReinjectDuplicates -> checkdup k
|
||||||
|
(reinject k)
|
||||||
|
(importfile ld k)
|
||||||
_ -> importfile ld k
|
_ -> importfile ld k
|
||||||
skipbecause s = showNote (s ++ "; skipping") >> next (return True)
|
skipbecause s = showNote (s ++ "; skipping") >> next (return True)
|
||||||
|
|
||||||
|
|
|
@ -43,9 +43,12 @@ startSrcDest (src:dest:[])
|
||||||
| src == dest = stop
|
| src == dest = stop
|
||||||
| otherwise = notAnnexed src $ do
|
| otherwise = notAnnexed src $ do
|
||||||
showStart "reinject" dest
|
showStart "reinject" dest
|
||||||
next $ ifAnnexed dest
|
next $ ifAnnexed dest go stop
|
||||||
(\key -> perform src key (verifyKeyContent DefaultVerify UnVerified key src))
|
where
|
||||||
stop
|
go key = ifM (verifyKeyContent DefaultVerify UnVerified key src)
|
||||||
|
( perform src key
|
||||||
|
, error "failed"
|
||||||
|
)
|
||||||
startSrcDest _ = giveup "specify a src file and a dest file"
|
startSrcDest _ = giveup "specify a src file and a dest file"
|
||||||
|
|
||||||
startKnown :: FilePath -> CommandStart
|
startKnown :: FilePath -> CommandStart
|
||||||
|
@ -55,7 +58,7 @@ startKnown src = notAnnexed src $ do
|
||||||
case mkb of
|
case mkb of
|
||||||
Nothing -> error "Failed to generate key"
|
Nothing -> error "Failed to generate key"
|
||||||
Just (key, _) -> ifM (isKnownKey key)
|
Just (key, _) -> ifM (isKnownKey key)
|
||||||
( next $ perform src key (return True)
|
( next $ perform src key
|
||||||
, do
|
, do
|
||||||
warning "Not known content; skipping"
|
warning "Not known content; skipping"
|
||||||
next $ next $ return True
|
next $ next $ return True
|
||||||
|
@ -65,19 +68,15 @@ notAnnexed :: FilePath -> CommandStart -> CommandStart
|
||||||
notAnnexed src = ifAnnexed src $
|
notAnnexed src = ifAnnexed src $
|
||||||
giveup $ "cannot used annexed file as src: " ++ src
|
giveup $ "cannot used annexed file as src: " ++ src
|
||||||
|
|
||||||
perform :: FilePath -> Key -> Annex Bool -> CommandPerform
|
perform :: FilePath -> Key -> CommandPerform
|
||||||
perform src key verify = ifM move
|
perform src key = ifM move
|
||||||
( next $ cleanup key
|
( next $ cleanup key
|
||||||
, error "failed"
|
, error "failed"
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
move = checkDiskSpaceToGet key False $
|
move = checkDiskSpaceToGet key False $ do
|
||||||
ifM verify
|
moveAnnex key src
|
||||||
( do
|
return True
|
||||||
moveAnnex key src
|
|
||||||
return True
|
|
||||||
, return False
|
|
||||||
)
|
|
||||||
|
|
||||||
cleanup :: Key -> CommandCleanup
|
cleanup :: Key -> CommandCleanup
|
||||||
cleanup key = do
|
cleanup key = do
|
||||||
|
|
|
@ -33,10 +33,9 @@ Several options can be used to adjust handling of duplicate files.
|
||||||
|
|
||||||
Do not delete files from the import location.
|
Do not delete files from the import location.
|
||||||
|
|
||||||
This could allow importing the same files repeatedly
|
Running with this option repeatedly can import the same files into
|
||||||
to different locations in a repository. More likely, it could be used to
|
different git repositories, or branches, or different locations in a git
|
||||||
import the same files to a number of different branches or separate git
|
repository.
|
||||||
repositories.
|
|
||||||
|
|
||||||
* `--deduplicate`
|
* `--deduplicate`
|
||||||
|
|
||||||
|
@ -53,6 +52,12 @@ Several options can be used to adjust handling of duplicate files.
|
||||||
Does not import any files, but any files found in the import location
|
Does not import any files, but any files found in the import location
|
||||||
that are duplicates are deleted.
|
that are duplicates are deleted.
|
||||||
|
|
||||||
|
* `--reinject-duplicates`
|
||||||
|
|
||||||
|
Imports files that are not duplicates. Files that are duplicates have
|
||||||
|
their content reinjected into the annex (similar to
|
||||||
|
[[git-annex-reinject]]).
|
||||||
|
|
||||||
* `--force`
|
* `--force`
|
||||||
|
|
||||||
Allow existing files to be overwritten by newly imported files.
|
Allow existing files to be overwritten by newly imported files.
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 4"""
|
||||||
|
date="2017-02-09T19:33:46Z"
|
||||||
|
content="""
|
||||||
|
Actually, import --deduplicate, --skip-duplicates, --clean-duplicates
|
||||||
|
are implemeted naively and do hash files twice. So it's
|
||||||
|
the same efficiency..
|
||||||
|
|
||||||
|
But, I just finished a more complicated implementation that avoids
|
||||||
|
the second hashing.
|
||||||
|
|
||||||
|
That does make the combined action worth adding, I suppose. Done so as
|
||||||
|
--reinject-duplicates.
|
||||||
|
"""]]
|
Loading…
Add table
Reference in a new issue