import --reinject-duplicates
This is the same as running git annex reinject --known, followed by git-annex import. The advantage to having it in one command is that it only has to hash each file once; the two commands have to hash the imported files a second time. This commit was sponsored by Shane-o on Patreon.
This commit is contained in:
		
					parent
					
						
							
								eae186e4b9
							
						
					
				
			
			
				commit
				
					
						c1ece47ea0
					
				
			
		
					 5 changed files with 50 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -46,6 +46,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
 | 
			
		|||
  * import: --deduplicate and --skip-duplicates were implemented
 | 
			
		||||
    inneficiently; they unncessarily hashed each file twice. They have
 | 
			
		||||
    been improved to only hash once.
 | 
			
		||||
  * import: Added --reinject-duplicates.
 | 
			
		||||
 | 
			
		||||
 -- Joey Hess <id@joeyh.name>  Fri, 06 Jan 2017 15:22:06 -0400
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,6 +11,7 @@ import Command
 | 
			
		|||
import qualified Git
 | 
			
		||||
import qualified Annex
 | 
			
		||||
import qualified Command.Add
 | 
			
		||||
import qualified Command.Reinject
 | 
			
		||||
import Utility.CopyFile
 | 
			
		||||
import Backend
 | 
			
		||||
import Types.KeySource
 | 
			
		||||
| 
						 | 
				
			
			@ -28,7 +29,7 @@ cmd = withGlobalOptions (jobsOption : jsonOption : fileMatchingOptions) $ notBar
 | 
			
		|||
		"move and add files from outside git working copy"
 | 
			
		||||
		paramPaths (seek <$$> optParser)
 | 
			
		||||
 | 
			
		||||
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates
 | 
			
		||||
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
 | 
			
		||||
	deriving (Eq)
 | 
			
		||||
 | 
			
		||||
data ImportOptions = ImportOptions
 | 
			
		||||
| 
						 | 
				
			
			@ -57,7 +58,11 @@ duplicateModeParser =
 | 
			
		|||
		)
 | 
			
		||||
	<|> flag' SkipDuplicates
 | 
			
		||||
		( long "skip-duplicates"
 | 
			
		||||
		<> help "import only new files"
 | 
			
		||||
		<> help "import only new files (do not delete source files)"
 | 
			
		||||
		)
 | 
			
		||||
	<|> flag' ReinjectDuplicates
 | 
			
		||||
		( long "reinject-duplicates"
 | 
			
		||||
		<> help "import new files, and reinject the content of files that were imported before"
 | 
			
		||||
		)
 | 
			
		||||
 | 
			
		||||
seek :: ImportOptions -> CommandSeek
 | 
			
		||||
| 
						 | 
				
			
			@ -88,6 +93,9 @@ start largematcher mode (srcfile, destfile) =
 | 
			
		|||
				warning "Could not verify that the content is still present in the annex; not removing from the import location."
 | 
			
		||||
				stop
 | 
			
		||||
			)
 | 
			
		||||
	reinject k = do
 | 
			
		||||
		showNote "reinjecting"
 | 
			
		||||
		Command.Reinject.perform srcfile k
 | 
			
		||||
	importfile ld k = checkdestdir $ do
 | 
			
		||||
		ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
 | 
			
		||||
		if ignored
 | 
			
		||||
| 
						 | 
				
			
			@ -184,6 +192,9 @@ start largematcher mode (srcfile, destfile) =
 | 
			
		|||
		SkipDuplicates -> checkdup k 
 | 
			
		||||
			(skipbecause "duplicate")
 | 
			
		||||
			(importfile ld k)
 | 
			
		||||
		ReinjectDuplicates -> checkdup k
 | 
			
		||||
			(reinject k)
 | 
			
		||||
			(importfile ld k)
 | 
			
		||||
		_ -> importfile ld k
 | 
			
		||||
	skipbecause s = showNote (s ++ "; skipping") >> next (return True)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -43,9 +43,12 @@ startSrcDest (src:dest:[])
 | 
			
		|||
	| src == dest = stop
 | 
			
		||||
	| otherwise = notAnnexed src $ do
 | 
			
		||||
		showStart "reinject" dest
 | 
			
		||||
		next $ ifAnnexed dest
 | 
			
		||||
			(\key -> perform src key (verifyKeyContent DefaultVerify UnVerified key src))
 | 
			
		||||
			stop
 | 
			
		||||
		next $ ifAnnexed dest go stop
 | 
			
		||||
  where
 | 
			
		||||
	go key = ifM (verifyKeyContent DefaultVerify UnVerified key src)
 | 
			
		||||
		( perform src key
 | 
			
		||||
		, error "failed"
 | 
			
		||||
		)
 | 
			
		||||
startSrcDest _ = giveup "specify a src file and a dest file"
 | 
			
		||||
 | 
			
		||||
startKnown :: FilePath -> CommandStart
 | 
			
		||||
| 
						 | 
				
			
			@ -55,7 +58,7 @@ startKnown src = notAnnexed src $ do
 | 
			
		|||
	case mkb of
 | 
			
		||||
		Nothing -> error "Failed to generate key"
 | 
			
		||||
		Just (key, _) -> ifM (isKnownKey key)
 | 
			
		||||
			( next $ perform src key (return True)
 | 
			
		||||
			( next $ perform src key
 | 
			
		||||
			, do
 | 
			
		||||
				warning "Not known content; skipping"
 | 
			
		||||
				next $ next $ return True
 | 
			
		||||
| 
						 | 
				
			
			@ -65,19 +68,15 @@ notAnnexed :: FilePath -> CommandStart -> CommandStart
 | 
			
		|||
notAnnexed src = ifAnnexed src $
 | 
			
		||||
	giveup $ "cannot used annexed file as src: " ++ src
 | 
			
		||||
 | 
			
		||||
perform :: FilePath -> Key -> Annex Bool -> CommandPerform
 | 
			
		||||
perform src key verify = ifM move
 | 
			
		||||
perform :: FilePath -> Key -> CommandPerform
 | 
			
		||||
perform src key = ifM move
 | 
			
		||||
	( next $ cleanup key
 | 
			
		||||
	, error "failed"
 | 
			
		||||
	)
 | 
			
		||||
  where
 | 
			
		||||
	move = checkDiskSpaceToGet key False $
 | 
			
		||||
		ifM verify
 | 
			
		||||
			( do
 | 
			
		||||
				moveAnnex key src
 | 
			
		||||
				return True
 | 
			
		||||
			, return False
 | 
			
		||||
			)
 | 
			
		||||
	move = checkDiskSpaceToGet key False $ do
 | 
			
		||||
		moveAnnex key src
 | 
			
		||||
		return True
 | 
			
		||||
 | 
			
		||||
cleanup :: Key -> CommandCleanup
 | 
			
		||||
cleanup key = do
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,10 +33,9 @@ Several options can be used to adjust handling of duplicate files.
 | 
			
		|||
 | 
			
		||||
  Do not delete files from the import location.
 | 
			
		||||
 | 
			
		||||
  This could allow importing the same files repeatedly
 | 
			
		||||
  to different locations in a repository. More likely, it could be used to
 | 
			
		||||
  import the same files to a number of different branches or separate git
 | 
			
		||||
  repositories.
 | 
			
		||||
  Running with this option repeatedly can import the same files into
 | 
			
		||||
  different git repositories, or branches, or different locations in a git
 | 
			
		||||
  repository.
 | 
			
		||||
 | 
			
		||||
* `--deduplicate`
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -53,6 +52,12 @@ Several options can be used to adjust handling of duplicate files.
 | 
			
		|||
  Does not import any files, but any files found in the import location
 | 
			
		||||
  that are duplicates are deleted.
 | 
			
		||||
 | 
			
		||||
* `--reinject-duplicates`
 | 
			
		||||
 | 
			
		||||
  Imports files that are not duplicates. Files that are duplicates have
 | 
			
		||||
  their content reinjected into the annex (similar to
 | 
			
		||||
  [[git-annex-reinject]]).
 | 
			
		||||
 | 
			
		||||
* `--force`
 | 
			
		||||
 | 
			
		||||
  Allow existing files to be overwritten by newly imported files.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,15 @@
 | 
			
		|||
[[!comment format=mdwn
 | 
			
		||||
 username="joey"
 | 
			
		||||
 subject="""comment 4"""
 | 
			
		||||
 date="2017-02-09T19:33:46Z"
 | 
			
		||||
 content="""
 | 
			
		||||
Actually, import --deduplicate, --skip-duplicates, --clean-duplicates 
 | 
			
		||||
are implemeted naively and do hash files twice. So it's
 | 
			
		||||
the same efficiency..
 | 
			
		||||
 | 
			
		||||
But, I just finished a more complicated implementation that avoids
 | 
			
		||||
the second hashing.
 | 
			
		||||
 | 
			
		||||
That does make the combined action worth adding, I suppose. Done so as
 | 
			
		||||
--reinject-duplicates.
 | 
			
		||||
"""]]
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue