other 80% of avoding verification when hard linking to objects in shared repo

In c6632ee5c8, it actually only handled
uploading objects to a shared repository. To avoid verification when
downloading objects from a shared repository, was a lot harder.

On the plus side, if the process of downloading a file from a remote
is able to verify its content on the side, the remote can indicate this
now, and avoid the extra post-download verification.

As of yet, I don't have any remotes (except Git) using this ability.
Some more work would be needed to support it in special remotes.

It would make sense for tahoe to implicitly verify things downloaded from it;
as long as you trust your tahoe server (which typically runs locally),
there's cryptographic integrity. OTOH, despite bup being based on shas,
a bup repo under an attacker's control could have the git ref used for an
object changed, and so a bup repo shouldn't implicitly verify. Indeed,
tahoe seems unique in being trustworthy enough to implicitly verify.
This commit is contained in:
Joey Hess 2015-10-02 13:56:42 -04:00
parent 7c7fe895f9
commit 2def1d0a23
15 changed files with 100 additions and 77 deletions

View file

@ -16,7 +16,8 @@ module Annex.Content (
getViaTmp,
getViaTmp',
checkDiskSpaceToGet,
Verify(..),
VerifyConfig(..),
Types.Remote.unVerified,
prepTmp,
withTmp,
checkDiskSpace,
@ -218,18 +219,19 @@ lockContent key a = do
{- Runs an action, passing it the temp file to get,
- and if the action succeeds, verifies the file matches
- the key and moves the file into the annex as a key's content. -}
getViaTmp :: Verify -> Key -> (FilePath -> Annex Bool) -> Annex Bool
getViaTmp :: VerifyConfig -> Key -> (FilePath -> Annex (Bool, Types.Remote.Verification)) -> Annex Bool
getViaTmp v key action = checkDiskSpaceToGet key False $
getViaTmp' v key action
{- Like getViaTmp, but does not check that there is enough disk space
- for the incoming key. For use when the key content is already on disk
- and not being copied into place. -}
getViaTmp' :: Verify -> Key -> (FilePath -> Annex Bool) -> Annex Bool
getViaTmp' :: VerifyConfig -> Key -> (FilePath -> Annex (Bool, Types.Remote.Verification)) -> Annex Bool
getViaTmp' v key action = do
tmpfile <- prepTmp key
ifM (action tmpfile)
( ifM (verifyKeyContent v key tmpfile)
(ok, verification) <- action tmpfile
if ok
then ifM (verifyKeyContent v verification key tmpfile)
( do
moveAnnex key tmpfile
logStatus key InfoPresent
@ -241,8 +243,7 @@ getViaTmp' v key action = do
)
-- On transfer failure, the tmp file is left behind, in case
-- caller wants to resume its transfer
, return False
)
else return False
{- Verifies that a file is the expected content of a key.
- Configuration can prevent verification, for either a
@ -253,8 +254,9 @@ getViaTmp' v key action = do
- When the key's backend allows verifying the content (eg via checksum),
- it is checked.
-}
verifyKeyContent :: Verify -> Key -> FilePath -> Annex Bool
verifyKeyContent v k f = ifM (shouldVerify v)
verifyKeyContent :: VerifyConfig -> Types.Remote.Verification -> Key -> FilePath -> Annex Bool
verifyKeyContent _ Types.Remote.Verified _ _ = return True
verifyKeyContent v Types.Remote.UnVerified k f = ifM (shouldVerify v)
( verifysize <&&> verifycontent
, return True
)
@ -268,9 +270,9 @@ verifyKeyContent v k f = ifM (shouldVerify v)
Nothing -> return True
Just verifier -> verifier k f
data Verify = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
shouldVerify :: Verify -> Annex Bool
shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig

View file

@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE CPP, FlexibleInstances #-}
module Annex.Transfer (
module X,
@ -24,20 +24,31 @@ import Annex.Notification as X
import Annex.Perms
import Utility.Metered
import Utility.LockPool
import Types.Remote (Verification(..))
import Control.Concurrent
type TransferAction = MeterUpdate -> Annex Bool
type TransferObserver = Bool -> Transfer -> TransferInfo -> Annex ()
class Observable a where
observeBool :: a -> Bool
observeFailure :: a
instance Observable Bool where
observeBool = id
observeFailure = False
instance Observable (Bool, Verification) where
observeBool = fst
observeFailure = (False, UnVerified)
noObserver :: TransferObserver
noObserver _ _ _ = noop
upload :: UUID -> Key -> AssociatedFile -> RetryDecider -> TransferObserver -> TransferAction -> NotifyWitness -> Annex Bool
upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> TransferObserver -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload u key f d o a _witness = runTransfer (Transfer Upload u key) f d o a
download :: UUID -> Key -> AssociatedFile -> RetryDecider -> TransferObserver -> TransferAction -> NotifyWitness -> Annex Bool
download :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> TransferObserver -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
download u key f d o a _witness = runTransfer (Transfer Download u key) f d o a
{- Runs a transfer action. Creates and locks the lock file while the
@ -52,7 +63,7 @@ download u key f d o a _witness = runTransfer (Transfer Download u key) f d o a
- An upload can be run from a read-only filesystem, and in this case
- no transfer information or lock file is used.
-}
runTransfer :: Transfer -> Maybe FilePath -> RetryDecider -> TransferObserver -> TransferAction -> Annex Bool
runTransfer :: Observable v => Transfer -> Maybe FilePath -> RetryDecider -> TransferObserver -> (MeterUpdate -> Annex v) -> Annex v
runTransfer = runTransfer' False
{- Like runTransfer, but ignores any existing transfer lock file for the
@ -60,10 +71,10 @@ runTransfer = runTransfer' False
-
- Note that this may result in confusing progress meter display in the
- webapp, if multiple processes are writing to the transfer info file. -}
alwaysRunTransfer :: Transfer -> Maybe FilePath -> RetryDecider -> TransferObserver -> TransferAction -> Annex Bool
alwaysRunTransfer :: Observable v => Transfer -> Maybe FilePath -> RetryDecider -> TransferObserver -> (MeterUpdate -> Annex v) -> Annex v
alwaysRunTransfer = runTransfer' True
runTransfer' :: Bool -> Transfer -> Maybe FilePath -> RetryDecider -> TransferObserver -> TransferAction -> Annex Bool
runTransfer' :: Observable v => Bool -> Transfer -> Maybe FilePath -> RetryDecider -> TransferObserver -> (MeterUpdate -> Annex v) -> Annex v
runTransfer' ignorelock t file shouldretry transferobserver transferaction = do
info <- liftIO $ startTransferInfo file
(meter, tfile, metervar) <- mkProgressUpdater t info
@ -72,12 +83,12 @@ runTransfer' ignorelock t file shouldretry transferobserver transferaction = do
if inprogress && not ignorelock
then do
showNote "transfer already in progress, or unable to take transfer lock"
return False
return observeFailure
else do
ok <- retry info metervar $ transferaction meter
v <- retry info metervar $ transferaction meter
liftIO $ cleanup tfile lck
transferobserver ok t info
return ok
transferobserver (observeBool v) t info
return v
where
#ifndef mingw32_HOST_OS
prep tfile mode info = do
@ -128,7 +139,7 @@ runTransfer' ignorelock t file shouldretry transferobserver transferaction = do
let newinfo = oldinfo { bytesComplete = Just b }
if shouldretry oldinfo newinfo
then retry newinfo metervar run
else return False
else return observeFailure
getbytescomplete metervar
| transferDirection t == Upload =
liftIO $ readMVar metervar

View file

@ -154,7 +154,7 @@ downloadRemoteFile r relaxed uri file sz = do
-- so that the remote knows what url it
-- should use to download it.
setTempUrl urlkey loguri
let downloader = Remote.retrieveKeyFile r urlkey (Just file)
let downloader = \dest p -> fst <$> Remote.retrieveKeyFile r urlkey (Just file) dest p
ret <- downloadWith downloader urlkey (Remote.uuid r) loguri file
removeTempUrl urlkey
return ret

View file

@ -161,7 +161,7 @@ performRemote key file backend numcopies remote =
( return (Just True)
, ifM (Annex.getState Annex.fast)
( return Nothing
, Just <$>
, Just . fst <$>
Remote.retrieveKeyFile remote key Nothing tmp dummymeter
)
)

View file

@ -52,7 +52,7 @@ perform file oldkey newkey = do
{- Make a hard link to the old key content (when supported),
- to avoid wasting disk space. -}
linkKey :: Key -> Key -> Annex Bool
linkKey oldkey newkey = getViaTmp' DefaultVerify newkey $ \tmp -> do
linkKey oldkey newkey = getViaTmp' DefaultVerify newkey $ \tmp -> unVerified $ do
src <- calcRepo $ gitAnnexLocation oldkey
liftIO $ ifM (doesFileExist tmp)
( return True

View file

@ -40,7 +40,7 @@ start key = fieldTransfer Download key $ \_p -> do
, return False
)
where
go tmp = do
go tmp = unVerified $ do
opts <- filterRsyncSafeOptions . maybe [] words
<$> getField "RsyncOptions"
liftIO $ rsyncServerReceive (map Param opts) tmp

View file

@ -43,7 +43,7 @@ perform src _dest key = ifM move
-- so moveFile is used rather than simply calling
-- moveToObjectDir; disk space is also checked this way,
-- and the file's content is verified to match the key.
move = getViaTmp DefaultVerify key $ \tmp ->
move = getViaTmp DefaultVerify key $ \tmp -> unVerified $
liftIO $ catchBoolIO $ do
moveFile src tmp
return True

View file

@ -35,7 +35,7 @@ perform file key = do
-- the file might be on a different filesystem, so moveFile is used
-- rather than simply calling moveAnnex; disk space is also
-- checked this way.
ok <- getViaTmp DefaultVerify key $ \dest ->
ok <- getViaTmp DefaultVerify key $ \dest -> unVerified $
if dest /= file
then liftIO $ catchBoolIO $ do
moveFile file dest

View file

@ -176,7 +176,7 @@ testUnavailable st r k =
getViaTmp (RemoteVerify r) k $ \dest ->
Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate
, check (== Right False) "retrieveKeyFileCheap" $
getViaTmp (RemoteVerify r) k $ \dest ->
getViaTmp (RemoteVerify r) k $ \dest -> unVerified $
Remote.retrieveKeyFileCheap r k Nothing dest
]
where

View file

@ -76,8 +76,8 @@ gen r _ c gc =
, checkUrl = Just checkTorrentUrl
}
downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
downloadKey key _file dest p =
downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
downloadKey key _file dest p = unVerified $
get . map (torrentUrlNum . fst . getDownloader) =<< getBitTorrentUrls key
where
get [] = do

View file

@ -359,13 +359,13 @@ dropKey r key
| otherwise = commitOnCleanup r $ Ssh.dropKey (repo r) key
{- Tries to copy a key's content from a remote's annex to a file. -}
copyFromRemote :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
copyFromRemote :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
copyFromRemote r key file dest p = parallelMetered (Just p) key file $
copyFromRemote' r key file dest
copyFromRemote' :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
copyFromRemote' :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
copyFromRemote' r key file dest meterupdate
| not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ do
| not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (unVerified (return False)) $ do
params <- Ssh.rsyncParams r Download
u <- getUUID
hardlink <- wantHardLink
@ -374,18 +374,17 @@ copyFromRemote' r key file dest meterupdate
ensureInitialized
v <- Annex.Content.prepSendAnnex key
case v of
Nothing -> return False
Nothing -> return (False, UnVerified)
Just (object, checksuccess) -> do
copier <- mkCopier hardlink params
runTransfer (Transfer Download u key)
file noRetry noObserver
(callCopier copier object dest)
<&&> checksuccess
| Git.repoIsSsh (repo r) = feedprogressback $ \feeder -> do
(\p -> copier object dest p checksuccess)
| Git.repoIsSsh (repo r) = unVerified $ feedprogressback $ \feeder -> do
direct <- isDirect
Ssh.rsyncHelper (Just feeder)
=<< Ssh.rsyncParamsRemote direct r Download key dest file
| Git.repoIsHttp (repo r) = Annex.Content.downloadUrl (keyUrls r key) dest
| Git.repoIsHttp (repo r) = unVerified $ Annex.Content.downloadUrl (keyUrls r key) dest
| otherwise = error "copying from non-ssh, non-http remote not supported"
where
{- Feed local rsync's progress info back to the remote,
@ -462,8 +461,8 @@ copyFromRemoteCheap r key af file
)
| Git.repoIsSsh (repo r) =
ifM (Annex.Content.preseedTmp key file)
( parallelMetered Nothing key af $
copyFromRemote' r key af file
( fst <$> parallelMetered Nothing key af
(copyFromRemote' r key af file)
, return False
)
| otherwise = return False
@ -502,13 +501,11 @@ copyToRemote' r key file p
, do
ensureInitialized
copier <- mkCopier hardlink params
let verify = if isHardLinker copier
then Annex.Content.NoVerify
else Annex.Content.RemoteVerify r
let verify = Annex.Content.RemoteVerify r
runTransfer (Transfer Download u key) file noRetry noObserver $ const $
Annex.Content.saveState True `after`
Annex.Content.getViaTmp verify key
(\dest -> callCopier copier object dest p <&&> liftIO checksuccessio)
(\dest -> copier object dest p (liftIO checksuccessio))
)
fsckOnRemote :: Git.Repo -> [CommandParam] -> Annex (IO Bool)
@ -620,32 +617,34 @@ commitOnCleanup r a = go `after` a
wantHardLink :: Annex Bool
wantHardLink = (annexHardLink <$> Annex.getGitConfig) <&&> (not <$> isDirect)
data Copier
= Copier (FilePath -> FilePath -> MeterUpdate -> Annex Bool)
| HardLinker (FilePath -> FilePath -> MeterUpdate -> Annex Bool)
isHardLinker :: Copier -> Bool
isHardLinker (Copier _) = False
isHardLinker (HardLinker _) = True
callCopier :: Copier -> FilePath -> FilePath -> MeterUpdate -> Annex Bool
callCopier (Copier a) = a
callCopier (HardLinker a) = a
-- Copies from src to dest, updating a meter. If the copy finishes
-- successfully, calls a final check action, which must also success, or
-- returns false.
--
-- If either the remote or local repository wants to use hard links,
-- the copier will do so, falling back to copying.
-- the copier will do so (falling back to copying if a hard link cannot be
-- made).
--
-- When a hard link is created, returns Verified; the repo being linked
-- from is implicitly trusted, so no expensive verification needs to be
-- done.
type Copier = FilePath -> FilePath -> MeterUpdate -> Annex Bool -> Annex (Bool, Verification)
mkCopier :: Bool -> [CommandParam] -> Annex Copier
mkCopier remotewanthardlink rsyncparams = do
let copier = rsyncOrCopyFile rsyncparams
let copier = \src dest p check -> unVerified $
rsyncOrCopyFile rsyncparams src dest p <&&> check
#ifndef mingw32_HOST_OS
localwanthardlink <- wantHardLink
let linker = \object dest -> createLink object dest >> return True
let linker = \src dest -> createLink src dest >> return True
ifM (pure (remotewanthardlink || localwanthardlink) <&&> not <$> isDirect)
( return $ HardLinker $ \object dest p ->
liftIO (catchBoolIO (linker object dest))
<||> copier object dest p
, return $ Copier copier
( return $ \src dest p check ->
ifM (liftIO (catchBoolIO (linker src dest)))
( return (True, Verified)
, copier src dest p check
)
, return copier
)
#else
return $ Copier copier
return copier
#endif

View file

@ -122,8 +122,8 @@ byteRetriever a k _m callback = a k (callback . ByteContent)
-}
storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool
storeKeyDummy _ _ _ = return False
retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retreiveKeyFileDummy _ _ _ _ = return False
retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
retreiveKeyFileDummy _ _ _ _ = unVerified (return False)
removeKeyDummy :: Key -> Annex Bool
removeKeyDummy _ = return False
checkPresentDummy :: Key -> Annex Bool
@ -156,7 +156,7 @@ specialRemote' cfg c preparestorer prepareretriever prepareremover preparecheckp
where
encr = baser
{ storeKey = \k f p -> cip >>= storeKeyGen k f p
, retrieveKeyFile = \k f d p -> cip >>= retrieveKeyFileGen k f d p
, retrieveKeyFile = \k f d p -> cip >>= unVerified . retrieveKeyFileGen k f d p
, retrieveKeyFileCheap = \k f d -> cip >>= maybe
(retrieveKeyFileCheap baser k f d)
-- retrieval of encrypted keys is never cheap

View file

@ -116,8 +116,8 @@ store u hdl k _f _p = sendAnnex k noop $ \src ->
(return False)
(\cap -> storeCapability u k cap >> return True)
retrieve :: UUID -> TahoeHandle -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retrieve u hdl k _f d _p = go =<< getCapability u k
retrieve :: UUID -> TahoeHandle -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
retrieve u hdl k _f d _p = unVerified $ go =<< getCapability u k
where
go Nothing = return False
go (Just cap) = liftIO $ requestTahoe hdl "get" [Param cap, File d]

View file

@ -70,8 +70,8 @@ gen r _ c gc =
, checkUrl = Nothing
}
downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
downloadKey key _file dest _p = get =<< getWebUrls key
downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
downloadKey key _file dest _p = unVerified $ get =<< getWebUrls key
where
get [] = do
warning "no known url"

View file

@ -13,6 +13,8 @@ module Types.Remote
, RemoteTypeA(..)
, RemoteA(..)
, Availability(..)
, Verification(..)
, unVerified
)
where
@ -64,9 +66,9 @@ data RemoteA a = Remote {
-- all of its contents have been transferred.
storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool,
-- Retrieves a key's contents to a file.
-- (The MeterUpdate does not need to be used if it retrieves
-- directly to the file, and not to an intermediate file.)
retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a Bool,
-- (The MeterUpdate does not need to be used if it writes
-- sequentially to the file.)
retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a (Bool, Verification),
-- Retrieves a key's contents to a tmp file, if it can be done cheaply.
-- It's ok to create a symlink or hardlink.
retrieveKeyFileCheap :: Key -> AssociatedFile -> FilePath -> a Bool,
@ -122,3 +124,12 @@ instance Eq (RemoteA a) where
instance Ord (RemoteA a) where
compare = comparing uuid
-- Use Verified when the content of a key is verified as part of a
-- transfer, and so a separate verification step is not needed.
data Verification = UnVerified | Verified
unVerified :: Monad m => m Bool -> m (Bool, Verification)
unVerified a = do
ok <- a
return (ok, UnVerified)