addurl: Add --relaxed option.

This commit is contained in:
Joey Hess 2013-03-11 19:55:01 -04:00
parent 283dd11e7d
commit de6f74ac88
4 changed files with 24 additions and 12 deletions

View file

@ -25,7 +25,7 @@ import Config
import Annex.Content.Direct
def :: [Command]
def = [notBareRepo $ withOptions [fileOption, pathdepthOption] $
def = [notBareRepo $ withOptions [fileOption, pathdepthOption, relaxedOption] $
command "addurl" (paramRepeating paramUrl) seek "add urls to annex"]
fileOption :: Option
@ -34,28 +34,32 @@ fileOption = Option.field [] "file" paramFile "specify what file the url is adde
pathdepthOption :: Option
pathdepthOption = Option.field [] "pathdepth" paramNumber "path components to use in filename"
relaxedOption :: Option
relaxedOption = Option.flag [] "relaxed" "skip size check"
seek :: [CommandSeek]
seek = [withField fileOption return $ \f ->
withFlag relaxedOption $ \relaxed ->
withField pathdepthOption (return . maybe Nothing readish) $ \d ->
withStrings $ start f d]
withStrings $ start relaxed f d]
start :: Maybe FilePath -> Maybe Int -> String -> CommandStart
start optfile pathdepth s = go $ fromMaybe bad $ parseURI s
start :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart
start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
where
bad = fromMaybe (error $ "bad url " ++ s) $
parseURI $ escapeURIString isUnescapedInURI s
go url = do
let file = fromMaybe (url2file url pathdepth) optfile
showStart "addurl" file
next $ perform s file
next $ perform relaxed s file
perform :: String -> FilePath -> CommandPerform
perform url file = ifAnnexed file addurl geturl
perform :: Bool -> String -> FilePath -> CommandPerform
perform relaxed url file = ifAnnexed file addurl geturl
where
geturl = do
liftIO $ createDirectoryIfMissing True (parentDir file)
ifM (Annex.getState Annex.fast)
( nodownload url file , download url file )
ifM (Annex.getState Annex.fast <||> pure relaxed)
( nodownload relaxed url file , download url file )
addurl (key, _backend) = do
headers <- getHttpHeaders
ifM (liftIO $ Url.check url headers $ keySize key)
@ -90,10 +94,12 @@ download url file = do
setUrlPresent key url
next $ Command.Add.cleanup file key True
nodownload :: String -> FilePath -> CommandPerform
nodownload url file = do
nodownload :: Bool -> String -> FilePath -> CommandPerform
nodownload relaxed url file = do
headers <- getHttpHeaders
(exists, size) <- liftIO $ Url.exists url headers
(exists, size) <- if relaxed
then pure (True, Nothing)
else liftIO $ Url.exists url headers
if exists
then do
let key = Backend.URL.fromUrl url size

1
debian/changelog vendored
View file

@ -54,6 +54,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low
avoiding re-checksumming.
* assistant: Detects most renames, including directory renames, and
combines all their changes into a single commit.
* addurl: Add --relaxed option.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400

View file

@ -154,6 +154,9 @@ subdirectories).
To avoid immediately downloading the url, specify --fast.
To avoid storing the size of the url's content, and accept whatever
is there at a future point, specific --relaxed. (Implies --fast.)
Normally the filename is based on the full url, so will look like
"www.example.com_dir_subdir_bigfile". For a shorter filename, specify
--pathdepth=N. For example, --pathdepth=1 will use "dir/subdir/bigfile",

View file

@ -5,3 +5,5 @@ c.f. [http://git-annex.branchable.com/tips/How_to_retroactively_annex_a_file_alr
The bottleneck I'm hitting here seems to be the fact that `git annex addurl` diligently checks each url to see that it is accessible, which adds up quickly if many files are to be processed.
It would be great if addurl had an option to disable checking the url, in order to speed up large batch jobs like this.
> --relaxed added [[done]] --[[Joey]]