Optimise non-bare http remotes; no longer does a 404 to the wrong url every time before trying the right url. Needs annex-bare to be set to false, which is done when initially probing the uuid of a http remote.

This commit is contained in:
Joey Hess 2014-01-26 13:03:25 -04:00
parent b93e485ef1
commit 5fc2d760ea
5 changed files with 46 additions and 16 deletions

View file

@ -111,7 +111,7 @@ gen r u c gc
, retrieveKeyFile = copyFromRemote new , retrieveKeyFile = copyFromRemote new
, retrieveKeyFileCheap = copyFromRemoteCheap new , retrieveKeyFileCheap = copyFromRemoteCheap new
, removeKey = dropKey new , removeKey = dropKey new
, hasKey = inAnnex r , hasKey = inAnnex new
, hasKeyCheap = repoCheap r , hasKeyCheap = repoCheap r
, whereisKey = Nothing , whereisKey = Nothing
, remoteFsck = if Git.repoIsUrl r , remoteFsck = if Git.repoIsUrl r
@ -197,7 +197,12 @@ tryGitConfigRead r
Left _ -> do Left _ -> do
set_ignore "not usable by git-annex" set_ignore "not usable by git-annex"
return r return r
Right r' -> return r' Right r' -> do
-- Cache when http remote is not bare for
-- optimisation.
unless (Git.Config.isBare r') $
setremote "annex-bare" (Git.Config.boolConfig False)
return r'
store = observe $ \r' -> do store = observe $ \r' -> do
g <- gitRepo g <- gitRepo
@ -222,12 +227,18 @@ tryGitConfigRead r
set_ignore "does not have git-annex installed" set_ignore "does not have git-annex installed"
return r return r
set_ignore msg = case Git.remoteName r of set_ignore msg = do
let k = "annex-ignore"
case Git.remoteName r of
Nothing -> noop
Just n -> warning $ "Remote " ++ n ++ " " ++ msg ++ "; setting " ++ k
setremote k (Git.Config.boolConfig True)
setremote k v = case Git.remoteName r of
Nothing -> noop Nothing -> noop
Just n -> do Just n -> do
let k = "remote." ++ n ++ ".annex-ignore" let k' = "remote." ++ n ++ "." ++ k
warning $ "Remote " ++ n ++ " " ++ msg ++ "; setting " ++ k inRepo $ Git.Command.run [Param "config", Param k', Param v]
inRepo $ Git.Command.run [Param "config", Param k, Param "true"]
handlegcrypt Nothing = return r handlegcrypt Nothing = return r
handlegcrypt (Just _cacheduuid) = do handlegcrypt (Just _cacheduuid) = do
@ -242,15 +253,16 @@ tryGitConfigRead r
- If the remote cannot be accessed, or if it cannot determine - If the remote cannot be accessed, or if it cannot determine
- whether it has the content, returns a Left error message. - whether it has the content, returns a Left error message.
-} -}
inAnnex :: Git.Repo -> Key -> Annex (Either String Bool) inAnnex :: Remote -> Key -> Annex (Either String Bool)
inAnnex r key inAnnex rmt key
| Git.repoIsHttp r = checkhttp =<< getHttpHeaders | Git.repoIsHttp r = checkhttp =<< getHttpHeaders
| Git.repoIsUrl r = checkremote | Git.repoIsUrl r = checkremote
| otherwise = checklocal | otherwise = checklocal
where where
r = repo rmt
checkhttp headers = do checkhttp headers = do
showChecking r showChecking r
ifM (anyM (\u -> Url.withUserAgent $ Url.checkBoth u headers (keySize key)) (keyUrls r key)) ifM (anyM (\u -> Url.withUserAgent $ Url.checkBoth u headers (keySize key)) (keyUrls rmt key))
( return $ Right True ( return $ Right True
, return $ Left "not found" , return $ Left "not found"
) )
@ -263,14 +275,19 @@ inAnnex r key
dispatch (Right (Just b)) = Right b dispatch (Right (Just b)) = Right b
dispatch (Right Nothing) = cantCheck r dispatch (Right Nothing) = cantCheck r
keyUrls :: Git.Repo -> Key -> [String] keyUrls :: Remote -> Key -> [String]
keyUrls r key = map tourl locs keyUrls r key = map tourl locs'
where where
tourl l = Git.repoLocation r ++ "/" ++ l tourl l = Git.repoLocation (repo r) ++ "/" ++ l
-- If the remote is known to not be bare, try the hash locations
-- used for non-bare repos first, as an optimisation.
locs
| remoteAnnexBare (gitconfig r) == Just False = reverse (annexLocations key)
| otherwise = annexLocations key
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
locs = annexLocations key locs' = locs
#else #else
locs = map (replace "\\" "/") (annexLocations key) locs' = map (replace "\\" "/") (annexLocations key)
#endif #endif
dropKey :: Remote -> Key -> Annex Bool dropKey :: Remote -> Key -> Annex Bool
@ -309,7 +326,7 @@ copyFromRemote' r key file dest
direct <- isDirect direct <- isDirect
Ssh.rsyncHelper (Just feeder) Ssh.rsyncHelper (Just feeder)
=<< Ssh.rsyncParamsRemote direct r Download key dest file =<< Ssh.rsyncParamsRemote direct r Download key dest file
| Git.repoIsHttp (repo r) = Annex.Content.downloadUrl (keyUrls (repo r) key) dest | Git.repoIsHttp (repo r) = Annex.Content.downloadUrl (keyUrls r key) dest
| otherwise = error "copying from non-ssh, non-http remote not supported" | otherwise = error "copying from non-ssh, non-http remote not supported"
where where
{- Feed local rsync's progress info back to the remote, {- Feed local rsync's progress info back to the remote,

View file

@ -109,6 +109,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexStartCommand :: Maybe String , remoteAnnexStartCommand :: Maybe String
, remoteAnnexStopCommand :: Maybe String , remoteAnnexStopCommand :: Maybe String
, remoteAnnexAvailability :: Maybe Availability , remoteAnnexAvailability :: Maybe Availability
, remoteAnnexBare :: Maybe Bool
{- These settings are specific to particular types of remotes {- These settings are specific to particular types of remotes
- including special remotes. -} - including special remotes. -}
@ -139,6 +140,7 @@ extractRemoteGitConfig r remotename = RemoteGitConfig
, remoteAnnexStartCommand = notempty $ getmaybe "start-command" , remoteAnnexStartCommand = notempty $ getmaybe "start-command"
, remoteAnnexStopCommand = notempty $ getmaybe "stop-command" , remoteAnnexStopCommand = notempty $ getmaybe "stop-command"
, remoteAnnexAvailability = getmayberead "availability" , remoteAnnexAvailability = getmayberead "availability"
, remoteAnnexBare = getmaybebool "bare"
, remoteAnnexSshOptions = getoptions "ssh-options" , remoteAnnexSshOptions = getoptions "ssh-options"
, remoteAnnexRsyncOptions = getoptions "rsync-options" , remoteAnnexRsyncOptions = getoptions "rsync-options"

4
debian/changelog vendored
View file

@ -31,6 +31,10 @@ git-annex (5.20140118) UNRELEASED; urgency=medium
* assistant: Run the periodic git gc in batch mode. * assistant: Run the periodic git gc in batch mode.
* added annex.secure-erase-command config option. * added annex.secure-erase-command config option.
* test suite: Use tasty-rerun, and expose tasty command-line options. * test suite: Use tasty-rerun, and expose tasty command-line options.
* Optimise non-bare http remotes; no longer does a 404 to the wrong
url every time before trying the right url. Needs annex-bare to be
set to false, which is done when initially probing the uuid of a
http remote.
-- Joey Hess <joeyh@debian.org> Sat, 18 Jan 2014 11:54:17 -0400 -- Joey Hess <joeyh@debian.org> Sat, 18 Jan 2014 11:54:17 -0400

View file

@ -1355,11 +1355,16 @@ Here are all the supported configuration settings.
configured by the trust and untrust commands. The value can be any of configured by the trust and untrust commands. The value can be any of
"trusted", "semitrusted" or "untrusted". "trusted", "semitrusted" or "untrusted".
* `remote.<name>.availability` * `remote.<name>.annex-availability`
Can be used to tell git-annex whether a remote is LocallyAvailable Can be used to tell git-annex whether a remote is LocallyAvailable
or GloballyAvailable. Normally, git-annex determines this automatically. or GloballyAvailable. Normally, git-annex determines this automatically.
* `remote.<name>.annex-bare`
Can be used to tell git-annex if a remote is a bare repository
or not. Normally, git-annex determines this automatically.
* `remote.<name>.annex-ssh-options` * `remote.<name>.annex-ssh-options`
Options to use when using ssh to talk to this remote. Options to use when using ssh to talk to this remote.

View file

@ -14,3 +14,5 @@ always avoid this 404 problem.
directory hashing, but that's been discussed elsewhere.) directory hashing, but that's been discussed elsewhere.)
--[[Joey]] --[[Joey]]
[[done]]