addurl: Add --relaxed option.

This commit is contained in:
Joey Hess 2013-03-11 19:55:01 -04:00
parent 283dd11e7d
commit de6f74ac88
4 changed files with 24 additions and 12 deletions

View file

@ -25,7 +25,7 @@ import Config
import Annex.Content.Direct import Annex.Content.Direct
def :: [Command] def :: [Command]
def = [notBareRepo $ withOptions [fileOption, pathdepthOption] $ def = [notBareRepo $ withOptions [fileOption, pathdepthOption, relaxedOption] $
command "addurl" (paramRepeating paramUrl) seek "add urls to annex"] command "addurl" (paramRepeating paramUrl) seek "add urls to annex"]
fileOption :: Option fileOption :: Option
@ -34,28 +34,32 @@ fileOption = Option.field [] "file" paramFile "specify what file the url is adde
pathdepthOption :: Option pathdepthOption :: Option
pathdepthOption = Option.field [] "pathdepth" paramNumber "path components to use in filename" pathdepthOption = Option.field [] "pathdepth" paramNumber "path components to use in filename"
relaxedOption :: Option
relaxedOption = Option.flag [] "relaxed" "skip size check"
seek :: [CommandSeek] seek :: [CommandSeek]
seek = [withField fileOption return $ \f -> seek = [withField fileOption return $ \f ->
withFlag relaxedOption $ \relaxed ->
withField pathdepthOption (return . maybe Nothing readish) $ \d -> withField pathdepthOption (return . maybe Nothing readish) $ \d ->
withStrings $ start f d] withStrings $ start relaxed f d]
start :: Maybe FilePath -> Maybe Int -> String -> CommandStart start :: Bool -> Maybe FilePath -> Maybe Int -> String -> CommandStart
start optfile pathdepth s = go $ fromMaybe bad $ parseURI s start relaxed optfile pathdepth s = go $ fromMaybe bad $ parseURI s
where where
bad = fromMaybe (error $ "bad url " ++ s) $ bad = fromMaybe (error $ "bad url " ++ s) $
parseURI $ escapeURIString isUnescapedInURI s parseURI $ escapeURIString isUnescapedInURI s
go url = do go url = do
let file = fromMaybe (url2file url pathdepth) optfile let file = fromMaybe (url2file url pathdepth) optfile
showStart "addurl" file showStart "addurl" file
next $ perform s file next $ perform relaxed s file
perform :: String -> FilePath -> CommandPerform perform :: Bool -> String -> FilePath -> CommandPerform
perform url file = ifAnnexed file addurl geturl perform relaxed url file = ifAnnexed file addurl geturl
where where
geturl = do geturl = do
liftIO $ createDirectoryIfMissing True (parentDir file) liftIO $ createDirectoryIfMissing True (parentDir file)
ifM (Annex.getState Annex.fast) ifM (Annex.getState Annex.fast <||> pure relaxed)
( nodownload url file , download url file ) ( nodownload relaxed url file , download url file )
addurl (key, _backend) = do addurl (key, _backend) = do
headers <- getHttpHeaders headers <- getHttpHeaders
ifM (liftIO $ Url.check url headers $ keySize key) ifM (liftIO $ Url.check url headers $ keySize key)
@ -90,10 +94,12 @@ download url file = do
setUrlPresent key url setUrlPresent key url
next $ Command.Add.cleanup file key True next $ Command.Add.cleanup file key True
nodownload :: String -> FilePath -> CommandPerform nodownload :: Bool -> String -> FilePath -> CommandPerform
nodownload url file = do nodownload relaxed url file = do
headers <- getHttpHeaders headers <- getHttpHeaders
(exists, size) <- liftIO $ Url.exists url headers (exists, size) <- if relaxed
then pure (True, Nothing)
else liftIO $ Url.exists url headers
if exists if exists
then do then do
let key = Backend.URL.fromUrl url size let key = Backend.URL.fromUrl url size

1
debian/changelog vendored
View file

@ -54,6 +54,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low
avoiding re-checksumming. avoiding re-checksumming.
* assistant: Detects most renames, including directory renames, and * assistant: Detects most renames, including directory renames, and
combines all their changes into a single commit. combines all their changes into a single commit.
* addurl: Add --relaxed option.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400 -- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400

View file

@ -154,6 +154,9 @@ subdirectories).
To avoid immediately downloading the url, specify --fast. To avoid immediately downloading the url, specify --fast.
To avoid storing the size of the url's content, and accept whatever
is there at a future point, specific --relaxed. (Implies --fast.)
Normally the filename is based on the full url, so will look like Normally the filename is based on the full url, so will look like
"www.example.com_dir_subdir_bigfile". For a shorter filename, specify "www.example.com_dir_subdir_bigfile". For a shorter filename, specify
--pathdepth=N. For example, --pathdepth=1 will use "dir/subdir/bigfile", --pathdepth=N. For example, --pathdepth=1 will use "dir/subdir/bigfile",

View file

@ -5,3 +5,5 @@ c.f. [http://git-annex.branchable.com/tips/How_to_retroactively_annex_a_file_alr
The bottleneck I'm hitting here seems to be the fact that `git annex addurl` diligently checks each url to see that it is accessible, which adds up quickly if many files are to be processed. The bottleneck I'm hitting here seems to be the fact that `git annex addurl` diligently checks each url to see that it is accessible, which adds up quickly if many files are to be processed.
It would be great if addurl had an option to disable checking the url, in order to speed up large batch jobs like this. It would be great if addurl had an option to disable checking the url, in order to speed up large batch jobs like this.
> --relaxed added [[done]] --[[Joey]]