import: Add --skip-duplicates option.

Note that the hash backends were made to stop printing a (checksum..)
message as part of this, since it showed up without a file when deciding
whether to act on a file. Should have probably removed that message a while
ago anyway, I suppose.
This commit is contained in:
Joey Hess 2013-12-04 13:13:30 -04:00
parent 9639142df9
commit 64160a9679
4 changed files with 46 additions and 36 deletions

View file

@ -129,9 +129,7 @@ needsUpgrade key = "\\" `isPrefixOf` keyHash key ||
any (not . validExtension) (takeExtensions $ keyName key) any (not . validExtension) (takeExtensions $ keyName key)
hashFile :: Hash -> FilePath -> Integer -> Annex String hashFile :: Hash -> FilePath -> Integer -> Annex String
hashFile hash file filesize = do hashFile hash file filesize = liftIO $ go hash
showAction "checksum"
liftIO $ go hash
where where
go (SHAHash hashsize) = case shaHasher hashsize filesize of go (SHAHash hashsize) = case shaHasher hashsize filesize of
Left sha -> sha <$> L.readFile file Left sha -> sha <$> L.readFile file

View file

@ -28,18 +28,22 @@ opts =
[ duplicateOption [ duplicateOption
, deduplicateOption , deduplicateOption
, cleanDuplicatesOption , cleanDuplicatesOption
, skipDuplicatesOption
] ]
duplicateOption :: Option duplicateOption :: Option
duplicateOption = Option.flag [] "duplicate" "do not delete outside files" duplicateOption = Option.flag [] "duplicate" "do not delete source files"
deduplicateOption :: Option deduplicateOption :: Option
deduplicateOption = Option.flag [] "deduplicate" "do not add files whose content has been seen" deduplicateOption = Option.flag [] "deduplicate" "delete source files whose content was imported before"
cleanDuplicatesOption :: Option cleanDuplicatesOption :: Option
cleanDuplicatesOption = Option.flag [] "clean-duplicates" "delete outside duplicate files (import nothing)" cleanDuplicatesOption = Option.flag [] "clean-duplicates" "delete duplicate source files (import nothing)"
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates skipDuplicatesOption :: Option
skipDuplicatesOption = Option.flag [] "skip-duplicates" "import only new files"
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates
deriving (Eq) deriving (Eq)
getDuplicateMode :: Annex DuplicateMode getDuplicateMode :: Annex DuplicateMode
@ -47,13 +51,15 @@ getDuplicateMode = gen
<$> getflag duplicateOption <$> getflag duplicateOption
<*> getflag deduplicateOption <*> getflag deduplicateOption
<*> getflag cleanDuplicatesOption <*> getflag cleanDuplicatesOption
<*> getflag skipDuplicatesOption
where where
getflag = Annex.getFlag . Option.name getflag = Annex.getFlag . Option.name
gen False False False = Default gen False False False False = Default
gen True False False = Duplicate gen True False False False = Duplicate
gen False True False = DeDuplicate gen False True False False = DeDuplicate
gen False False True = CleanDuplicates gen False False True False = CleanDuplicates
gen _ _ _ = error "bad combination of --duplicate, --deduplicate, --clean-duplicates" gen False False False True = SkipDuplicates
gen _ _ _ _ = error "bad combination of --duplicate, --deduplicate, --clean-duplicates, --skip-duplicates"
seek :: [CommandSeek] seek :: [CommandSeek]
seek = [withValue getDuplicateMode $ \mode -> withPathContents $ start mode] seek = [withValue getDuplicateMode $ \mode -> withPathContents $ start mode]
@ -62,43 +68,45 @@ start :: DuplicateMode -> (FilePath, FilePath) -> CommandStart
start mode (srcfile, destfile) = start mode (srcfile, destfile) =
ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile) ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile)
( do ( do
showStart "import" destfile isdup <- do
next $ perform mode srcfile destfile backend <- chooseBackend destfile
let ks = KeySource srcfile srcfile Nothing
v <- genKey ks backend
case v of
Just (k, _) -> not . null <$> keyLocations k
_ -> return False
case pickaction isdup of
Nothing -> stop
Just a -> do
showStart "import" destfile
next a
, stop , stop
) )
perform :: DuplicateMode -> FilePath -> FilePath -> CommandPerform
perform mode srcfile destfile =
case mode of
DeDuplicate -> ifM isdup
( deletedup
, go
)
CleanDuplicates -> ifM isdup
( deletedup
, next $ return True
)
_ -> go
where where
isdup = do
backend <- chooseBackend destfile
let ks = KeySource srcfile srcfile Nothing
v <- genKey ks backend
case v of
Just (k, _) -> not . null <$> keyLocations k
_ -> return False
deletedup = do deletedup = do
showNote "duplicate" showNote "duplicate"
liftIO $ removeFile srcfile liftIO $ removeFile srcfile
next $ return True next $ return True
go = do importfile = do
whenM (liftIO $ doesFileExist destfile) $ whenM (liftIO $ doesFileExist destfile) $
unlessM (Annex.getState Annex.force) $ unlessM (Annex.getState Annex.force) $
error $ "not overwriting existing " ++ destfile ++ error $ "not overwriting existing " ++ destfile ++
" (use --force to override)" " (use --force to override)"
liftIO $ createDirectoryIfMissing True (parentDir destfile) liftIO $ createDirectoryIfMissing True (parentDir destfile)
liftIO $ if mode == Duplicate liftIO $ if mode == Duplicate || mode == SkipDuplicates
then void $ copyFileExternal srcfile destfile then void $ copyFileExternal srcfile destfile
else moveFile srcfile destfile else moveFile srcfile destfile
Command.Add.perform destfile Command.Add.perform destfile
pickaction isdup = case mode of
DeDuplicate
| isdup -> Just deletedup
| otherwise -> Just importfile
CleanDuplicates
| isdup -> Just deletedup
| otherwise -> Nothing
SkipDuplicates
| isdup -> Nothing
| otherwise -> Just importfile
_ -> Just importfile

1
debian/changelog vendored
View file

@ -14,6 +14,7 @@ git-annex (5.20131131) UNRELEASED; urgency=low
* Deal with box.com changing the url of their webdav endpoint. * Deal with box.com changing the url of their webdav endpoint.
* Android: Fix SRV record lookups for XMPP to use android getprop * Android: Fix SRV record lookups for XMPP to use android getprop
command to find DNS server, since there is no resolv.conf. command to find DNS server, since there is no resolv.conf.
* import: Add --skip-duplicates option.
-- Joey Hess <joeyh@debian.org> Sun, 01 Dec 2013 13:57:58 -0400 -- Joey Hess <joeyh@debian.org> Sun, 01 Dec 2013 13:57:58 -0400

View file

@ -236,6 +236,9 @@ subdirectories).
use the `--deduplicate` option. Duplicate files will be deleted from the use the `--deduplicate` option. Duplicate files will be deleted from the
import location. import location.
To only import files whose content has not been seen before by git-annex,
but avoid deleting duplicate files, use the `--skip-duplicates` option.
The `--clean-duplicates` option does not import any new files, but any files The `--clean-duplicates` option does not import any new files, but any files
found in the import location that are duplicates of content in the annex found in the import location that are duplicates of content in the annex
are deleted. are deleted.