get --incomplete: New option to resume any interrupted downloads.

This commit is contained in:
Joey Hess 2015-06-02 14:20:38 -04:00
parent 1f33822eb5
commit d28e8fbfd5
8 changed files with 73 additions and 52 deletions

View file

@ -35,9 +35,11 @@ module Annex.Content (
thawContent,
dirKeys,
withObjectLoc,
staleKeysPrune,
) where
import System.IO.Unsafe (unsafeInterleaveIO)
import qualified Data.Set as S
import Common.Annex
import Logs.Location
@ -663,3 +665,37 @@ dirKeys dirspec = do
, return []
)
{- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable.
-
- Also, stale keys that can be proven to have no value
- (ie, their content is already present) are deleted.
-}
staleKeysPrune :: (Git.Repo -> FilePath) -> Bool -> Annex [Key]
staleKeysPrune dirspec nottransferred = do
contents <- dirKeys dirspec
dups <- filterM inAnnex contents
let stale = contents `exclude` dups
dir <- fromRepo dirspec
liftIO $ forM_ dups $ \t -> removeFile $ dir </> keyFile t
if nottransferred
then do
inprogress <- S.fromList . map (transferKey . fst)
<$> getTransfers
return $ filter (`S.notMember` inprogress) stale
else return stale
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
-
- Constructing a single set, of the list that tends to be
- smaller, appears more efficient in both memory and CPU
- than constructing and taking the S.difference of two sets. -}
exclude :: Ord a => [a] -> [a] -> [a]
exclude [] _ = [] -- optimisation
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
remove a b = foldl (flip S.delete) b a

View file

@ -62,6 +62,9 @@ keyOptions =
"operate on specified key"
]
incompleteOption :: Option
incompleteOption = flagOption [] "incomplete" "resume previous downloads"
-- Options to match properties of annexed files.
annexedMatchingOptions :: [Option]
annexedMatchingOptions = concat

View file

@ -27,6 +27,7 @@ import CmdLine.Action
import Logs.Location
import Logs.Unused
import Annex.CatFile
import Annex.Content
withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesInGit a params = seekActions $ prepFiltered a $
@ -163,42 +164,42 @@ withNothing :: CommandStart -> CommandSeek
withNothing a [] = seekActions $ return [a]
withNothing _ _ = error "This command takes no parameters."
{- If --all is specified, or in a bare repo, runs an action on all
- known keys.
{- Handles the --all, --unused, --key, and --incomplete options,
- which specify particular keys to run an action on.
-
- If --unused is specified, runs an action on all keys found by
- the last git annex unused scan.
- In a bare repo, --all is the default.
-
- If --key is specified, operates only on that key.
-
- Otherwise, fall back to a regular CommandSeek action on
- Otherwise falls back to a regular CommandSeek action on
- whatever params were passed. -}
withKeyOptions :: Bool -> (Key -> CommandStart) -> CommandSeek -> CommandSeek
withKeyOptions auto keyop fallbackop params = do
bare <- fromRepo Git.repoIsLocalBare
allkeys <- Annex.getFlag "all"
unused <- Annex.getFlag "unused"
incomplete <- Annex.getFlag "incomplete"
specifickey <- Annex.getField "key"
when (auto && bare) $
error "Cannot use --auto in a bare repository"
case (allkeys, unused, null params, specifickey) of
(False , False , True , Nothing)
case (allkeys, unused, incomplete, null params, specifickey) of
(False , False , False , True , Nothing)
| bare -> go auto loggedKeys
| otherwise -> fallbackop params
(False , False , _ , Nothing) -> fallbackop params
(True , False , True , Nothing) -> go auto loggedKeys
(False , True , True , Nothing) -> go auto unusedKeys'
(False , False , True , Just ks) -> case file2key ks of
(False , False , False , _ , Nothing) -> fallbackop params
(True , False , False , True , Nothing) -> go auto loggedKeys
(False , True , False , True , Nothing) -> go auto unusedKeys'
(False , False , True , True , Nothing) -> go auto incompletekeys
(False , False , False , True , Just ks) -> case file2key ks of
Nothing -> error "Invalid key"
Just k -> go auto $ return [k]
_ -> error "Can only specify one of file names, --all, --unused, or --key"
_ -> error "Can only specify one of file names, --all, --unused, --key, or --incomplete"
where
go True _ = error "Cannot use --auto with --all or --unused or --key"
go True _ = error "Cannot use --auto with --all or --unused or --key or --incomplete"
go False a = do
matcher <- Limit.getMatcher
seekActions $ map (process matcher) <$> a
process matcher k = ifM (matcher $ MatchingKey k)
( keyop k , return Nothing)
incompletekeys = staleKeysPrune gitAnnexTmpObjectDir True
prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart]
prepFiltered a fs = do

View file

@ -21,7 +21,8 @@ cmd = [withOptions getOptions $ command "get" paramPaths seek
SectionCommon "make content of annexed files available"]
getOptions :: [Option]
getOptions = fromOption : autoOption : jobsOption : annexedMatchingOptions ++ keyOptions
getOptions = fromOption : autoOption : jobsOption : annexedMatchingOptions
++ incompleteOption : keyOptions
seek :: CommandSeek
seek ps = do

View file

@ -9,7 +9,6 @@
module Command.Unused where
import qualified Data.Set as S
import Control.Monad.ST
import qualified Data.Map as M
@ -18,7 +17,6 @@ import Command
import Logs.Unused
import Annex.Content
import Logs.Location
import Logs.Transfer
import qualified Annex
import qualified Git
import qualified Git.Command
@ -174,18 +172,6 @@ excludeReferenced refspec ks = runfilter firstlevel ks >>= runfilter secondlevel
firstlevel = withKeysReferencedM
secondlevel = withKeysReferencedInGit refspec
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
-
- Constructing a single set, of the list that tends to be
- smaller, appears more efficient in both memory and CPU
- than constructing and taking the S.difference of two sets. -}
exclude :: Ord a => [a] -> [a] -> [a]
exclude [] _ = [] -- optimisation
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
remove a b = foldl (flip S.delete) b a
{- A bloom filter capable of holding half a million keys with a
- false positive rate of 1 in 1000 uses around 8 mb of memory,
- so will easily fit on even my lowest memory systems.
@ -313,28 +299,6 @@ withKeysReferencedInGitRef a ref = do
tKey False = fileKey . takeFileName . decodeBS <$$>
catFile ref . getTopFilePath . DiffTree.file
{- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable.
-
- Also, stale keys that can be proven to have no value are deleted.
-}
staleKeysPrune :: (Git.Repo -> FilePath) -> Bool -> Annex [Key]
staleKeysPrune dirspec nottransferred = do
contents <- dirKeys dirspec
dups <- filterM inAnnex contents
let stale = contents `exclude` dups
dir <- fromRepo dirspec
liftIO $ forM_ dups $ \t -> removeFile $ dir </> keyFile t
if nottransferred
then do
inprogress <- S.fromList . map (transferKey . fst)
<$> getTransfers
return $ filter (`S.notMember` inprogress) stale
else return stale
data UnusedMaps = UnusedMaps
{ unusedMap :: UnusedMap
, unusedBadMap :: UnusedMap

1
debian/changelog vendored
View file

@ -8,6 +8,7 @@ git-annex (5.20150529) UNRELEASED; urgency=medium
changes to network connections, as was already done with
network-manager and wicd.
Thanks to Sebastian Reuße for the patches.
* get --incomplete: New option to resume any interrupted downloads.
-- Joey Hess <id@joeyh.name> Sat, 30 May 2015 02:07:18 -0400

View file

@ -32,6 +32,19 @@ or transferring them from some kind of key-value store.
Enables parallel download with up to the specified number of jobs
running at once. For example: `-J10`
* `--incomplete`
Resume any incomplete downloads of files that were started and
interrupted at some point previously. Useful to pick up where you left
off ... when you don't quite remember where that was.
These incomplete files are the same ones that are
listed as unused temp files by [[git-annex-unused]](1).
Note that the git-annex key will be displayed when downloading,
as git-annex does not know the associated file, and the associated file
may not even be in the current git working directory.
* `--all`
Rather than specifying a filename or path to get, this option can be

View file

@ -4,3 +4,5 @@ download.
`git annex get --incomplete` could do this. (With or without --from to
specify which remote to get from.) --[[Joey]]
> [[done]] --[[Joey]]