Merge branch 'newchunks'

I am happy enough with this to make it live!
This commit is contained in:
Joey Hess 2014-08-01 18:00:47 -04:00
commit 5aa2286e7b
44 changed files with 1357 additions and 389 deletions

View file

@ -16,6 +16,7 @@ module Annex.Content (
getViaTmpChecked,
getViaTmpUnchecked,
prepGetViaTmpChecked,
prepTmp,
withTmp,
checkDiskSpace,
moveAnnex,
@ -264,7 +265,10 @@ prepTmp key = do
createAnnexDirectory (parentDir tmp)
return tmp
{- Creates a temp file, runs an action on it, and cleans up the temp file. -}
{- Creates a temp file for a key, runs an action on it, and cleans up
- the temp file. If the action throws an exception, the temp file is
- left behind, which allows for resuming.
-}
withTmp :: Key -> (FilePath -> Annex a) -> Annex a
withTmp key action = do
tmp <- prepTmp key

View file

@ -5,12 +5,13 @@
- AnnexState are retained. This works because the Annex monad
- internally stores the AnnexState in a MVar.
-
- Copyright 2011-2013 Joey Hess <joey@kitenet.net>
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Annex.Exception (
bracketIO,
@ -19,6 +20,8 @@ module Annex.Exception (
tryAnnexIO,
throwAnnex,
catchAnnex,
catchNonAsyncAnnex,
tryNonAsyncAnnex,
) where
import qualified Control.Monad.Catch as M
@ -48,3 +51,13 @@ throwAnnex = M.throwM
{- catch in the Annex monad -}
catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a
catchAnnex = M.catch
{- catchs all exceptions except for async exceptions -}
catchNonAsyncAnnex :: Annex a -> (SomeException -> Annex a) -> Annex a
catchNonAsyncAnnex a onerr = a `M.catches`
[ M.Handler (\ (e :: AsyncException) -> throwAnnex e)
, M.Handler (\ (e :: SomeException) -> onerr e)
]
tryNonAsyncAnnex :: Annex a -> Annex (Either SomeException a)
tryNonAsyncAnnex a = (Right <$> a) `catchNonAsyncAnnex` (return . Left)

View file

@ -184,7 +184,7 @@ runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant ()
runHandler handler file filestatus = void $ do
r <- tryIO <~> handler (normalize file) filestatus
case r of
Left e -> liftIO $ print e
Left e -> liftIO $ warningIO $ show e
Right Nothing -> noop
Right (Just change) -> do
-- Just in case the commit thread is not

View file

@ -14,7 +14,8 @@ module Backend (
isAnnexLink,
chooseBackend,
lookupBackendName,
maybeLookupBackendName
maybeLookupBackendName,
isStableKey,
) where
import Common.Annex
@ -32,6 +33,8 @@ import qualified Backend.Hash
import qualified Backend.WORM
import qualified Backend.URL
import qualified Data.Map as M
list :: [Backend]
list = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends
@ -116,7 +119,13 @@ lookupBackendName :: String -> Backend
lookupBackendName s = fromMaybe unknown $ maybeLookupBackendName s
where
unknown = error $ "unknown backend " ++ s
maybeLookupBackendName :: String -> Maybe Backend
maybeLookupBackendName s = headMaybe matches
where
matches = filter (\b -> s == B.name b) list
maybeLookupBackendName s = M.lookup s nameMap
nameMap :: M.Map String Backend
nameMap = M.fromList $ zip (map B.name list) list
isStableKey :: Key -> Bool
isStableKey k = maybe False (`B.isStableKey` k)
(maybeLookupBackendName (keyBackendName k))

View file

@ -7,7 +7,10 @@
{-# LANGUAGE CPP #-}
module Backend.Hash (backends) where
module Backend.Hash (
backends,
testKeyBackend,
) where
import Common.Annex
import qualified Annex
@ -36,24 +39,23 @@ hashes = concat
{- The SHA256E backend is the default, so genBackendE comes first. -}
backends :: [Backend]
backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes
backends = map genBackendE hashes ++ map genBackend hashes
genBackend :: Hash -> Maybe Backend
genBackend hash = Just Backend
genBackend :: Hash -> Backend
genBackend hash = Backend
{ name = hashName hash
, getKey = keyValue hash
, fsckKey = Just $ checkKeyChecksum hash
, canUpgradeKey = Just needsUpgrade
, fastMigrate = Just trivialMigrate
, isStableKey = const True
}
genBackendE :: Hash -> Maybe Backend
genBackendE hash = do
b <- genBackend hash
return $ b
{ name = hashNameE hash
, getKey = keyValueE hash
}
genBackendE :: Hash -> Backend
genBackendE hash = (genBackend hash)
{ name = hashNameE hash
, getKey = keyValueE hash
}
hashName :: Hash -> String
hashName (SHAHash size) = "SHA" ++ show size
@ -175,3 +177,18 @@ skeinHasher hashsize
| hashsize == 512 = show . skein512
#endif
| otherwise = error $ "unsupported skein size " ++ show hashsize
{- A varient of the SHA256E backend, for testing that needs special keys
- that cannot collide with legitimate keys in the repository.
-
- This is accomplished by appending a special extension to the key,
- that is not one that selectExtension would select (due to being too
- long).
-}
testKeyBackend :: Backend
testKeyBackend =
let b = genBackendE (SHAHash 256)
in b { getKey = (fmap addE) <$$> getKey b }
where
addE k = k { keyName = keyName k ++ longext }
longext = ".this-is-a-test-key"

View file

@ -25,6 +25,9 @@ backend = Backend
, fsckKey = Nothing
, canUpgradeKey = Nothing
, fastMigrate = Nothing
-- The content of an url can change at any time, so URL keys are
-- not stable.
, isStableKey = const False
}
{- Every unique url has a corresponding key. -}

View file

@ -23,6 +23,7 @@ backend = Backend
, fsckKey = Nothing
, canUpgradeKey = Nothing
, fastMigrate = Nothing
, isStableKey = const True
}
{- The key includes the file size, modification time, and the

View file

@ -96,9 +96,10 @@ import qualified Command.XMPPGit
#endif
import qualified Command.RemoteDaemon
#endif
import qualified Command.Test
#ifdef WITH_TESTSUITE
import qualified Command.Test
import qualified Command.FuzzTest
import qualified Command.TestRemote
#endif
#ifdef WITH_EKG
import System.Remote.Monitoring
@ -187,9 +188,10 @@ cmds = concat
#endif
, Command.RemoteDaemon.def
#endif
, Command.Test.def
#ifdef WITH_TESTSUITE
, Command.Test.def
, Command.FuzzTest.def
, Command.TestRemote.def
#endif
]

View file

@ -22,7 +22,7 @@ import Test.QuickCheck
import Control.Concurrent
def :: [Command]
def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionPlumbing
def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionTesting
"generates fuzz test files"]
seek :: CommandSeek

View file

@ -13,7 +13,7 @@ import Messages
def :: [Command]
def = [ noRepo startIO $ dontCheck repoExists $
command "test" paramNothing seek SectionPlumbing
command "test" paramNothing seek SectionTesting
"run built-in test suite"]
seek :: CommandSeek

196
Command/TestRemote.hs Normal file
View file

@ -0,0 +1,196 @@
{- git-annex command
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.TestRemote where
import Common
import Command
import qualified Annex
import qualified Remote
import qualified Types.Remote as Remote
import Types
import Types.Key (key2file, keyBackendName, keySize)
import Types.Backend (getKey, fsckKey)
import Types.KeySource
import Annex.Content
import Backend
import qualified Backend.Hash
import Utility.Tmp
import Utility.Metered
import Utility.DataUnits
import Utility.CopyFile
import Messages
import Types.Messages
import Remote.Helper.Chunked
import Locations
import Test.Tasty
import Test.Tasty.Runners
import Test.Tasty.HUnit
import Control.Exception
import "crypto-api" Crypto.Random
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
def :: [Command]
def = [ withOptions [sizeOption] $
command "testremote" paramRemote seek SectionTesting
"test transfers to/from a remote"]
sizeOption :: Option
sizeOption = fieldOption [] "size" paramSize "base key size (default 1MiB)"
seek :: CommandSeek
seek ps = do
basesz <- fromInteger . fromMaybe (1024 * 1024)
<$> getOptionField sizeOption (pure . getsize)
withWords (start basesz) ps
where
getsize v = v >>= readSize dataUnits
start :: Int -> [String] -> CommandStart
start basesz ws = do
let name = unwords ws
showStart "testremote" name
r <- either error id <$> Remote.byName' name
showSideAction "generating test keys"
ks <- mapM randKey (keySizes basesz)
rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz)
rs' <- concat <$> mapM encryptionVariants rs
next $ perform rs' ks
perform :: [Remote] -> [Key] -> CommandPerform
perform rs ks = do
st <- Annex.getState id
let tests = testGroup "Remote Tests" $
[ testGroup (desc r k) (test st r k) | k <- ks, r <- rs ]
ok <- case tryIngredients [consoleTestReporter] mempty tests of
Nothing -> error "No tests found!?"
Just act -> liftIO act
next $ cleanup rs ks ok
where
desc r' k = intercalate "; " $ map unwords
[ [ "key size", show (keySize k) ]
, [ show (chunkConfig (Remote.config r')) ]
, ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))]
]
adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote)
adjustChunkSize r chunksize = adjustRemoteConfig r
(M.insert "chunk" (show chunksize))
-- Variants of a remote with no encryption, and with simple shared
-- encryption. Gpg key based encryption is not tested.
encryptionVariants :: Remote -> Annex [Remote]
encryptionVariants r = do
noenc <- adjustRemoteConfig r (M.insert "encryption" "none")
sharedenc <- adjustRemoteConfig r $
M.insert "encryption" "shared" .
M.insert "highRandomQuality" "false"
return $ catMaybes [noenc, sharedenc]
-- Regenerate a remote with a modified config.
adjustRemoteConfig :: Remote -> (Remote.RemoteConfig -> Remote.RemoteConfig) -> Annex (Maybe Remote)
adjustRemoteConfig r adjustconfig = Remote.generate (Remote.remotetype r)
(Remote.repo r)
(Remote.uuid r)
(adjustconfig (Remote.config r))
(Remote.gitconfig r)
test :: Annex.AnnexState -> Remote -> Key -> [TestTree]
test st r k =
[ check "removeKey when not present" remove
, present False
, check "storeKey" store
, present True
, check "storeKey when already present" store
, present True
, check "retrieveKeyFile" $ do
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "retrieveKeyFile resume from 33%" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do
sz <- hFileSize h
L.hGet h $ fromInteger $ sz `div` 3
liftIO $ L.writeFile tmp partial
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "retrieveKeyFile resume from 0" $ do
tmp <- prepTmp k
liftIO $ writeFile tmp ""
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "retrieveKeyFile resume from end" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
void $ liftIO $ copyFileExternal loc tmp
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "removeKey when present" remove
, present False
]
where
check desc a = testCase desc $
Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
present b = check ("present " ++ show b) $
(== Right b) <$> Remote.hasKey r k
fsck = case maybeLookupBackendName (keyBackendName k) of
Nothing -> return True
Just b -> case fsckKey b of
Nothing -> return True
Just fscker -> fscker k (key2file k)
get = getViaTmp k $ \dest ->
Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate
store = Remote.storeKey r k Nothing nullMeterUpdate
remove = Remote.removeKey r k
cleanup :: [Remote] -> [Key] -> Bool -> CommandCleanup
cleanup rs ks ok = do
forM_ rs $ \r -> forM_ ks (Remote.removeKey r)
forM_ ks removeAnnex
return ok
chunkSizes :: Int -> [Int]
chunkSizes base =
[ 0 -- no chunking
, base `div` 100
, base `div` 1000
, base
]
keySizes :: Int -> [Int]
keySizes base = filter (>= 0)
[ 0 -- empty key is a special case when chunking
, base
, base + 1
, base - 1
, base * 2
]
randKey :: Int -> Annex Key
randKey sz = withTmpFile "randkey" $ \f h -> do
gen <- liftIO (newGenIO :: IO SystemRandom)
case genBytes sz gen of
Left e -> error $ "failed to generate random key: " ++ show e
Right (rand, _) -> liftIO $ B.hPut h rand
liftIO $ hClose h
let ks = KeySource
{ keyFilename = f
, contentLocation = f
, inodeCache = Nothing
}
k <- fromMaybe (error "failed to generate random key")
<$> getKey Backend.Hash.testKeyBackend ks
moveAnnex k f
return k

View file

@ -3,16 +3,18 @@
- Currently using gpg; could later be modified to support different
- crypto backends if neccessary.
-
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE Rank2Types #-}
module Crypto (
Cipher,
KeyIds(..),
EncKey,
StorableCipher(..),
genEncryptedCipher,
genSharedCipher,
@ -34,6 +36,8 @@ import qualified Data.ByteString.Lazy as L
import Data.ByteString.Lazy.UTF8 (fromString)
import Control.Applicative
import qualified Data.Map as M
import Control.Monad.IO.Class
import Control.Monad.Catch (MonadMask)
import Common.Annex
import qualified Utility.Gpg as Gpg
@ -138,17 +142,19 @@ decryptCipher (EncryptedCipher t variant _) =
Hybrid -> Cipher
PubKey -> MacOnlyCipher
type EncKey = Key -> Key
{- Generates an encrypted form of a Key. The encryption does not need to be
- reversable, nor does it need to be the same type of encryption used
- on content. It does need to be repeatable. -}
encryptKey :: Mac -> Cipher -> Key -> Key
encryptKey :: Mac -> Cipher -> EncKey
encryptKey mac c k = stubKey
{ keyName = macWithCipher mac c (key2file k)
, keyBackendName = "GPG" ++ showMac mac
}
type Feeder = Handle -> IO ()
type Reader a = Handle -> IO a
type Reader m a = Handle -> m a
feedFile :: FilePath -> Feeder
feedFile f h = L.hPut h =<< L.readFile f
@ -156,8 +162,8 @@ feedFile f h = L.hPut h =<< L.readFile f
feedBytes :: L.ByteString -> Feeder
feedBytes = flip L.hPut
readBytes :: (L.ByteString -> IO a) -> Reader a
readBytes a h = L.hGetContents h >>= a
readBytes :: (MonadIO m) => (L.ByteString -> m a) -> Reader m a
readBytes a h = liftIO (L.hGetContents h) >>= a
{- Runs a Feeder action, that generates content that is symmetrically
- encrypted with the Cipher (unless it is empty, in which case
@ -165,7 +171,7 @@ readBytes a h = L.hGetContents h >>= a
- read by the Reader action. Note: For public-key encryption,
- recipients MUST be included in 'params' (for instance using
- 'getGpgEncParams'). -}
encrypt :: [CommandParam] -> Cipher -> Feeder -> Reader a -> IO a
encrypt :: (MonadIO m, MonadMask m) => [CommandParam] -> Cipher -> Feeder -> Reader m a -> m a
encrypt params cipher = case cipher of
Cipher{} -> Gpg.feedRead (params ++ Gpg.stdEncryptionParams True) $
cipherPassphrase cipher
@ -174,7 +180,7 @@ encrypt params cipher = case cipher of
{- Runs a Feeder action, that generates content that is decrypted with the
- Cipher (or using a private key if the Cipher is empty), and read by the
- Reader action. -}
decrypt :: Cipher -> Feeder -> Reader a -> IO a
decrypt :: (MonadIO m, MonadMask m) => Cipher -> Feeder -> Reader m a -> m a
decrypt cipher = case cipher of
Cipher{} -> Gpg.feedRead [Param "--decrypt"] $ cipherPassphrase cipher
MacOnlyCipher{} -> Gpg.pipeLazy [Param "--decrypt"]

View file

@ -421,6 +421,7 @@ keyPaths key = map (keyPath key) annexHashes
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, most repositories use the lower case hash for new data. -}
type Hasher = Key -> FilePath
annexHashes :: [Hasher]
annexHashes = [hashDirLower, hashDirMixed]
@ -428,12 +429,12 @@ hashDirMixed :: Hasher
hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
where
dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d]
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file k
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower :: Hasher
hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir
where
dir = take 6 $ md5s $ md5FilePath $ key2file k
dir = take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
{- modified version of display_32bits_as_hex from Data.Hash.MD5
- Copyright (C) 2001 Ian Lynagh

View file

@ -15,7 +15,14 @@
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Chunk where
module Logs.Chunk (
ChunkMethod(..),
ChunkSize,
ChunkCount,
chunksStored,
chunksRemoved,
getCurrentChunks,
) where
import Common.Annex
import Logs
@ -26,19 +33,19 @@ import Logs.Chunk.Pure
import qualified Data.Map as M
import Data.Time.Clock.POSIX
chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex ()
chunksStored u k chunksize chunkcount = do
chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex ()
chunksStored u k chunkmethod chunkcount = do
ts <- liftIO getPOSIXTime
Annex.Branch.change (chunkLogFile k) $
showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog
showLog . changeMapLog ts (u, chunkmethod) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkSize -> Annex ()
chunksRemoved u k chunksize = chunksStored u k chunksize 0
chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)]
getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)]
getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k)
where
select = filter (\(_sz, ct) -> ct > 0)
. map (\((_ku, sz), l) -> (sz, value l))
select = filter (\(_m, ct) -> ct > 0)
. map (\((_ku, m), l) -> (m, value l))
. M.toList
. M.filterWithKey (\(ku, _sz) _ -> ku == u)
. M.filterWithKey (\(ku, _m) _ -> ku == u)

View file

@ -6,7 +6,8 @@
-}
module Logs.Chunk.Pure
( ChunkSize
( ChunkMethod(..)
, ChunkSize
, ChunkCount
, ChunkLog
, parseLog
@ -17,24 +18,37 @@ import Common.Annex
import Logs.MapLog
import Data.Int
-- Currently chunks are all fixed size, but other chunking methods
-- may be added.
data ChunkMethod = FixedSizeChunks ChunkSize | UnknownChunks String
deriving (Ord, Eq, Show)
type ChunkSize = Int64
-- 0 when chunks are no longer present
type ChunkCount = Integer
type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount
type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount
parseChunkMethod :: String -> ChunkMethod
parseChunkMethod s = maybe (UnknownChunks s) FixedSizeChunks (readish s)
showChunkMethod :: ChunkMethod -> String
showChunkMethod (FixedSizeChunks sz) = show sz
showChunkMethod (UnknownChunks s) = s
parseLog :: String -> ChunkLog
parseLog = parseMapLog fieldparser valueparser
where
fieldparser s =
let (u,sz) = separate (== sep) s
in (,) <$> pure (toUUID u) <*> readish sz
let (u,m) = separate (== sep) s
in Just (toUUID u, parseChunkMethod m)
valueparser = readish
showLog :: ChunkLog -> String
showLog = showMapLog fieldshower valueshower
where
fieldshower (u, sz) = fromUUID u ++ sep : show sz
fieldshower (u, m) = fromUUID u ++ sep : showChunkMethod m
valueshower = show
sep :: Char

View file

@ -1,16 +1,16 @@
{- A "remote" that is just a filesystem directory.
-
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE Rank2Types #-}
module Remote.Directory (remote) where
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import qualified Data.Map as M
import Common.Annex
@ -21,10 +21,8 @@ import Config.Cost
import Config
import Utility.FileMode
import Remote.Helper.Special
import Remote.Helper.Encryptable
import Remote.Helper.Chunked
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Crypto
import Remote.Helper.ChunkedEncryptable
import qualified Remote.Directory.LegacyChunked as Legacy
import Annex.Content
import Annex.UUID
import Utility.Metered
@ -41,15 +39,15 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot
gen r u c gc = do
cst <- remoteCost gc cheapRemoteCost
let chunkconfig = chunkConfig c
return $ Just $ encryptableRemote c
(storeEncrypted dir (getGpgEncParams (c,gc)) chunkconfig)
(retrieveEncrypted dir chunkconfig)
return $ Just $ chunkedEncryptableRemote c
(prepareStore dir chunkconfig)
(retrieve dir chunkconfig)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
storeKey = store dir chunkconfig,
retrieveKeyFile = retrieve dir chunkconfig,
storeKey = storeKeyDummy,
retrieveKeyFile = retreiveKeyFileDummy,
retrieveKeyFileCheap = retrieveCheap dir chunkconfig,
removeKey = remove dir,
hasKey = checkPresent dir chunkconfig,
@ -84,125 +82,49 @@ directorySetup mu _ c = do
gitConfigSpecialRemote u c' "directory" absdir
return (M.delete "directory" c', u)
{- Locations to try to access a given Key in the Directory.
- We try more than since we used to write to different hash directories. -}
{- Locations to try to access a given Key in the directory.
- We try more than one since we used to write to different hash
- directories. -}
locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k)
{- Returns the location off a Key in the directory. If the key is
- present, returns the location that is actually used, otherwise
- returns the first, default location. -}
getLocation :: FilePath -> Key -> IO FilePath
getLocation d k = do
let locs = locations d k
fromMaybe (Prelude.head locs) <$> firstM doesFileExist locs
{- Directory where the file(s) for a key are stored. -}
storeDir :: FilePath -> Key -> FilePath
storeDir d k = addTrailingPathSeparator $ d </> hashDirLower k </> keyFile k
{- Where we store temporary data for a key as it's being uploaded. -}
{- Where we store temporary data for a key, in the directory, as it's being
- written. -}
tmpDir :: FilePath -> Key -> FilePath
tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withCheckedFiles _ _ [] _ _ = return False
withCheckedFiles check (LegacyChunks _) d k a = go $ locations d k
where
go [] = return False
go (f:fs) = do
let chunkcount = f ++ Legacy.chunkCount
ifM (check chunkcount)
( do
chunks <- Legacy.listChunks f <$> readFile chunkcount
ifM (allM check chunks)
( a chunks , return False )
, do
chunks <- Legacy.probeChunks f check
if null chunks
then go fs
else a chunks
)
withCheckedFiles check _ d k a = go $ locations d k
where
go [] = return False
go (f:fs) = ifM (check f) ( a [f] , go fs )
{- Check if there is enough free disk space in the remote's directory to
- store the key. Note that the unencrypted key size is checked. -}
prepareStore :: FilePath -> ChunkConfig -> Preparer Storer
prepareStore d chunkconfig = checkPrepare
(\k -> checkDiskSpace (Just d) k 0)
(byteStorer $ store d chunkconfig)
withStoredFiles :: ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withStoredFiles = withCheckedFiles doesFileExist
store :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
store d chunkconfig k _f p = sendAnnex k (void $ remove d k) $ \src ->
metered (Just p) k $ \meterupdate ->
storeHelper d chunkconfig k k $ \dests ->
case chunkconfig of
LegacyChunks chunksize ->
storeLegacyChunked meterupdate chunksize dests
=<< L.readFile src
_ -> do
let dest = Prelude.head dests
meteredWriteFile meterupdate dest
=<< L.readFile src
return [dest]
storeEncrypted :: FilePath -> [CommandParam] -> ChunkConfig -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted d gpgOpts chunkconfig (cipher, enck) k p = sendAnnex k (void $ remove d enck) $ \src ->
metered (Just p) k $ \meterupdate ->
storeHelper d chunkconfig enck k $ \dests ->
encrypt gpgOpts cipher (feedFile src) $ readBytes $ \b ->
case chunkconfig of
LegacyChunks chunksize ->
storeLegacyChunked meterupdate chunksize dests b
_ -> do
let dest = Prelude.head dests
meteredWriteFile meterupdate dest b
return [dest]
{- Splits a ByteString into chunks and writes to dests, obeying configured
- chunk size (not to be confused with the L.ByteString chunk size).
- Note: Must always write at least one file, even for empty ByteString. -}
storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call"
storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b
| L.null b = do
-- must always write at least one file, even for empty
L.writeFile firstdest b
return [firstdest]
| otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) []
storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath]
storeLegacyChunked' _ _ [] _ _ = error "ran out of dests"
storeLegacyChunked' _ _ _ [] c = return $ reverse c
storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do
bs' <- withFile d WriteMode $
feed zeroBytesProcessed chunksize bs
storeLegacyChunked' meterupdate chunksize dests bs' (d:c)
where
feed _ _ [] _ = return []
feed bytes sz (l:ls) h = do
let len = S.length l
let s = fromIntegral len
if s <= sz || sz == chunksize
then do
S.hPut h l
let bytes' = addBytesProcessed bytes len
meterupdate bytes'
feed bytes' (sz - s) ls h
else return (l:ls)
storeHelper :: FilePath -> ChunkConfig -> Key -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool
storeHelper d chunkconfig key origkey storer = check <&&> liftIO go
where
tmpdir = tmpDir d key
destdir = storeDir d key
{- An encrypted key does not have a known size,
- so check that the size of the original key is available as free
- space. -}
check = do
liftIO $ createDirectoryIfMissing True tmpdir
checkDiskSpace (Just tmpdir) origkey 0
go = case chunkconfig of
NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do
let tmpf = tmpdir </> keyFile key
void $ storer [tmpf]
store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool
store d chunkconfig k b p = liftIO $ do
void $ tryIO $ createDirectoryIfMissing True tmpdir
case chunkconfig of
LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir
_ -> do
let tmpf = tmpdir </> keyFile k
meteredWriteFile p tmpf b
finalizer tmpdir destdir
return True
UnpaddedChunks _ -> error "TODO: storeHelper with UnpaddedChunks"
LegacyChunks _ -> Legacy.storeChunks key tmpdir destdir storer recorder finalizer
where
tmpdir = tmpDir d k
destdir = storeDir d k
finalizer tmp dest = do
void $ tryIO $ allowWrite dest -- may already exist
void $ tryIO $ removeDirectoryRecursive dest -- or not exist
@ -212,38 +134,21 @@ storeHelper d chunkconfig key origkey storer = check <&&> liftIO go
void $ tryIO $ do
mapM_ preventWrite =<< dirContents dest
preventWrite dest
recorder f s = do
void $ tryIO $ allowWrite f
writeFile f s
void $ tryIO $ preventWrite f
retrieve :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retrieve d chunkconfig k _ f p = metered (Just p) k $ \meterupdate ->
liftIO $ withStoredFiles chunkconfig d k $ \files ->
catchBoolIO $ do
meteredWriteFileChunks meterupdate f files L.readFile
return True
retrieveEncrypted :: FilePath -> ChunkConfig -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
retrieveEncrypted d chunkconfig (cipher, enck) k f p = metered (Just p) k $ \meterupdate ->
liftIO $ withStoredFiles chunkconfig d enck $ \files ->
catchBoolIO $ do
decrypt cipher (feeder files) $
readBytes $ meteredWriteFile meterupdate f
return True
where
feeder files h = forM_ files $ L.hPut h <=< L.readFile
retrieve :: FilePath -> ChunkConfig -> Preparer Retriever
retrieve d (LegacyChunks _) = Legacy.retrieve locations d
retrieve d _ = simplyPrepare $ byteRetriever $ \k ->
liftIO $ L.readFile =<< getLocation d k
retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool
-- no cheap retrieval for chunks
-- no cheap retrieval possible for chunks
retrieveCheap _ (UnpaddedChunks _) _ _ = return False
retrieveCheap _ (LegacyChunks _) _ _ = return False
#ifndef mingw32_HOST_OS
retrieveCheap d ck k f = liftIO $ withStoredFiles ck d k go
where
go [file] = catchBoolIO $ createSymbolicLink file f >> return True
go _files = return False
retrieveCheap d NoChunks k f = liftIO $ catchBoolIO $ do
file <- getLocation d k
createSymbolicLink file f
return True
#else
retrieveCheap _ _ _ _ = return False
#endif
@ -256,12 +161,25 @@ remove d k = liftIO $ do
- before it can delete them. -}
void $ tryIO $ mapM_ allowWrite =<< dirContents dir
#endif
catchBoolIO $ do
ok <- catchBoolIO $ do
removeDirectoryRecursive dir
return True
{- Removing the subdirectory will fail if it doesn't exist.
- But, we want to succeed in that case, as long as the directory
- remote's top-level directory does exist. -}
if ok
then return ok
else doesDirectoryExist d <&&> (not <$> doesDirectoryExist dir)
where
dir = storeDir d k
checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool)
checkPresent d chunkconfig k = liftIO $ catchMsgIO $ withStoredFiles chunkconfig d k $
const $ return True -- withStoredFiles checked that it exists
checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k
checkPresent d _ k = liftIO $ do
v <- catchMsgIO $ anyM doesFileExist (locations d k)
case v of
Right False -> ifM (doesDirectoryExist d)
( return v
, return $ Left $ "directory " ++ d ++ " is not accessible"
)
_ -> return v

View file

@ -0,0 +1,112 @@
{- Legacy chunksize support for directory special remote.
-
- Can be removed eventually.
-
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE Rank2Types #-}
module Remote.Directory.LegacyChunked where
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import Common.Annex
import Utility.FileMode
import Remote.Helper.ChunkedEncryptable
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Annex.Perms
import Utility.Metered
withCheckedFiles :: (FilePath -> IO Bool) -> FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withCheckedFiles _ [] _locations _ _ = return False
withCheckedFiles check d locations k a = go $ locations d k
where
go [] = return False
go (f:fs) = do
let chunkcount = f ++ Legacy.chunkCount
ifM (check chunkcount)
( do
chunks <- Legacy.listChunks f <$> readFile chunkcount
ifM (allM check chunks)
( a chunks , return False )
, do
chunks <- Legacy.probeChunks f check
if null chunks
then go fs
else a chunks
)
withStoredFiles :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withStoredFiles = withCheckedFiles doesFileExist
{- Splits a ByteString into chunks and writes to dests, obeying configured
- chunk size (not to be confused with the L.ByteString chunk size). -}
storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call"
storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b
| L.null b = do
-- always write at least one file, even for empty
L.writeFile firstdest b
return [firstdest]
| otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) []
storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath]
storeLegacyChunked' _ _ [] _ _ = error "ran out of dests"
storeLegacyChunked' _ _ _ [] c = return $ reverse c
storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do
bs' <- withFile d WriteMode $
feed zeroBytesProcessed chunksize bs
storeLegacyChunked' meterupdate chunksize dests bs' (d:c)
where
feed _ _ [] _ = return []
feed bytes sz (l:ls) h = do
let len = S.length l
let s = fromIntegral len
if s <= sz || sz == chunksize
then do
S.hPut h l
let bytes' = addBytesProcessed bytes len
meterupdate bytes'
feed bytes' (sz - s) ls h
else return (l:ls)
storeHelper :: (FilePath -> FilePath -> IO ()) -> Key -> ([FilePath] -> IO [FilePath]) -> FilePath -> FilePath -> IO Bool
storeHelper finalizer key storer tmpdir destdir = do
void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir
Legacy.storeChunks key tmpdir destdir storer recorder finalizer
where
recorder f s = do
void $ tryIO $ allowWrite f
writeFile f s
void $ tryIO $ preventWrite f
store :: ChunkSize -> (FilePath -> FilePath -> IO ()) -> Key -> L.ByteString -> MeterUpdate -> FilePath -> FilePath -> IO Bool
store chunksize finalizer k b p = storeHelper finalizer k $ \dests ->
storeLegacyChunked p chunksize dests b
{- Need to get a single ByteString containing every chunk.
- Done very innefficiently, by writing to a temp file.
- :/ This is legacy code..
-}
retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> Preparer Retriever
retrieve locations d basek a = do
showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow."
tmpdir <- fromRepo $ gitAnnexTmpMiscDir
createAnnexDirectory tmpdir
let tmp = tmpdir </> keyFile basek ++ ".directorylegacy.tmp"
a $ Just $ byteRetriever $ \k -> liftIO $ do
void $ withStoredFiles d locations k $ \fs -> do
forM_ fs $
S.appendFile tmp <=< S.readFile
return True
b <- L.readFile tmp
nukeFile tmp
return b
checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool)
checkPresent d locations k = liftIO $ catchMsgIO $
withStoredFiles d locations k $
-- withStoredFiles checked that it exists
const $ return True

View file

@ -15,14 +15,12 @@ import Types.CleanupActions
import qualified Git
import Config
import Remote.Helper.Special
import Remote.Helper.Encryptable
import Crypto
import Remote.Helper.ChunkedEncryptable
import Utility.Metered
import Logs.Transfer
import Logs.PreferredContent.Raw
import Logs.RemoteState
import Config.Cost
import Annex.Content
import Annex.UUID
import Annex.Exception
import Creds
@ -30,7 +28,6 @@ import Creds
import Control.Concurrent.STM
import System.Log.Logger (debugM)
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
remote :: RemoteType
remote = RemoteType {
@ -46,15 +43,15 @@ gen r u c gc = do
Annex.addCleanup (RemoteCleanup u) $ stopExternal external
cst <- getCost external r gc
avail <- getAvailability external r gc
return $ Just $ encryptableRemote c
(storeEncrypted external $ getGpgEncParams (c,gc))
(retrieveEncrypted external)
return $ Just $ chunkedEncryptableRemote c
(simplyPrepare $ store external)
(simplyPrepare $ retrieve external)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
storeKey = store external,
retrieveKeyFile = retrieve external,
storeKey = storeKeyDummy,
retrieveKeyFile = retreiveKeyFileDummy,
retrieveKeyFileCheap = \_ _ -> return False,
removeKey = remove external,
hasKey = checkPresent external,
@ -90,25 +87,8 @@ externalSetup mu _ c = do
gitConfigSpecialRemote u c'' "externaltype" externaltype
return (c'', u)
store :: External -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
store external k _f p = sendAnnex k rollback $ \f ->
metered (Just p) k $
storeHelper external k f
where
rollback = void $ remove external k
storeEncrypted :: External -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted external gpgOpts (cipher, enck) k p = withTmp enck $ \tmp ->
sendAnnex k rollback $ \src -> do
metered (Just p) k $ \meterupdate -> do
liftIO $ encrypt gpgOpts cipher (feedFile src) $
readBytes $ L.writeFile tmp
storeHelper external enck tmp meterupdate
where
rollback = void $ remove external enck
storeHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool
storeHelper external k f p = safely $
store :: External -> Storer
store external = fileStorer $ \k f p ->
handleRequest external (TRANSFER Upload k f) (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Upload k' | k == k' ->
@ -119,31 +99,15 @@ storeHelper external k f p = safely $
return False
_ -> Nothing
retrieve :: External -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retrieve external k _f d p = metered (Just p) k $
retrieveHelper external k d
retrieveEncrypted :: External -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
retrieveEncrypted external (cipher, enck) k f p = withTmp enck $ \tmp ->
metered (Just p) k $ \meterupdate ->
ifM (retrieveHelper external enck tmp meterupdate)
( liftIO $ catchBoolIO $ do
decrypt cipher (feedFile tmp) $
readBytes $ L.writeFile f
return True
, return False
)
retrieveHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool
retrieveHelper external k d p = safely $
retrieve :: External -> Retriever
retrieve external = fileRetriever $ \d k p ->
handleRequest external (TRANSFER Download k d) (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Download k'
| k == k' -> Just $ return True
| k == k' -> Just $ return ()
TRANSFER_FAILURE Download k' errmsg
| k == k' -> Just $ do
warning errmsg
return False
error errmsg
_ -> Nothing
remove :: External -> Key -> Annex Bool

View file

@ -282,7 +282,8 @@ jobList r keys = go =<< glacierEnv (config r) (uuid r)
then return nada
else do
enckeys <- forM keys $ \k ->
maybe k snd <$> cipherKey (config r) k
maybe k (\(_, enck) -> enck k)
<$> cipherKey (config r)
let keymap = M.fromList $ zip enckeys keys
let convert = mapMaybe (`M.lookup` keymap)
return (convert succeeded, convert failed)

View file

@ -1,22 +1,30 @@
{- git-annex chunked remotes
-
- Copyright 2012-2014 Joey Hess <joey@kitenet.net>
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remote.Helper.Chunked
( ChunkSize
, ChunkConfig(..)
, chunkConfig
, meteredWriteFileChunks
) where
module Remote.Helper.Chunked (
ChunkSize,
ChunkConfig(..),
chunkConfig,
storeChunks,
removeChunks,
retrieveChunks,
hasKeyChunks,
) where
import Common.Annex
import Utility.DataUnits
import Types.StoreRetrieve
import Types.Remote
import Logs.Chunk.Pure (ChunkSize)
import Types.Key
import Logs.Chunk
import Utility.Metered
import Crypto (EncKey)
import Backend (isStableKey)
import Annex.Exception
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
@ -25,23 +33,366 @@ data ChunkConfig
= NoChunks
| UnpaddedChunks ChunkSize
| LegacyChunks ChunkSize
deriving (Show)
noChunks :: ChunkConfig -> Bool
noChunks NoChunks = True
noChunks _ = False
chunkConfig :: RemoteConfig -> ChunkConfig
chunkConfig m =
case M.lookup "chunksize" m of
Nothing -> case M.lookup "chunk" m of
Nothing -> NoChunks
Just v -> UnpaddedChunks $ readsz v "chunk"
Just v -> LegacyChunks $ readsz v "chunksize"
Just v -> readsz UnpaddedChunks v "chunk"
Just v -> readsz LegacyChunks v "chunksize"
where
readsz v f = case readSize dataUnits v of
Just size | size > 0 -> fromInteger size
_ -> error ("bad " ++ f)
readsz c v f = case readSize dataUnits v of
Just size
| size == 0 -> NoChunks
| size > 0 -> c (fromInteger size)
_ -> error $ "bad configuration " ++ f ++ "=" ++ v
{- Writes a series of chunks to a file. The feeder is called to get
- each chunk. -}
meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
meteredWriteFileChunks meterupdate dest chunks feeder =
withBinaryFile dest WriteMode $ \h ->
forM_ chunks $
meteredWrite meterupdate h <=< feeder
-- An infinite stream of chunk keys, starting from chunk 1.
newtype ChunkKeyStream = ChunkKeyStream [Key]
chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
where
mk chunknum = sizedk { keyChunkNum = Just chunknum }
sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
nextChunkKeyStream (ChunkKeyStream []) = undefined -- stream is infinite!
takeChunkKeyStream :: ChunkCount -> ChunkKeyStream -> [Key]
takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l
-- Number of chunks already consumed from the stream.
numChunks :: ChunkKeyStream -> Integer
numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream
{- Splits up the key's content into chunks, passing each chunk to
- the storer action, along with a corresponding chunk key and a
- progress meter update callback.
-
- To support resuming, the checker is used to find the first missing
- chunk key. Storing starts from that chunk.
-
- This buffers each chunk in memory, so can use a lot of memory
- with a large ChunkSize.
- More optimal versions of this can be written, that rely
- on L.toChunks to split the lazy bytestring into chunks (typically
- smaller than the ChunkSize), and eg, write those chunks to a Handle.
- But this is the best that can be done with the storer interface that
- writes a whole L.ByteString at a time.
-}
storeChunks
:: UUID
-> ChunkConfig
-> Key
-> FilePath
-> MeterUpdate
-> (Key -> ContentSource -> MeterUpdate -> Annex Bool)
-> (Key -> Annex (Either String Bool))
-> Annex Bool
storeChunks u chunkconfig k f p storer checker =
case chunkconfig of
(UnpaddedChunks chunksize) | isStableKey k ->
bracketIO open close (go chunksize)
_ -> showprogress $ storer k (FileContent f)
where
showprogress = metered (Just p) k
open = tryIO $ openBinaryFile f ReadMode
close (Right h) = hClose h
close (Left _) = noop
go _ (Left e) = do
warning (show e)
return False
go chunksize (Right h) = showprogress $ \meterupdate -> do
let chunkkeys = chunkKeyStream k chunksize
(chunkkeys', startpos) <- seekResume h chunkkeys checker
b <- liftIO $ L.hGetContents h
gochunks meterupdate startpos chunksize b chunkkeys'
gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool
gochunks meterupdate startpos chunksize = loop startpos . splitchunk
where
splitchunk = L.splitAt chunksize
loop bytesprocessed (chunk, bs) chunkkeys
| L.null chunk && numchunks > 0 = do
-- Once all chunks are successfully
-- stored, update the chunk log.
chunksStored u k (FixedSizeChunks chunksize) numchunks
return True
| otherwise = do
liftIO $ meterupdate' zeroBytesProcessed
let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys
ifM (storer chunkkey (ByteContent chunk) meterupdate')
( do
let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk)
loop bytesprocessed' (splitchunk bs) chunkkeys'
, return False
)
where
numchunks = numChunks chunkkeys
{- The MeterUpdate that is passed to the action
- storing a chunk is offset, so that it reflects
- the total bytes that have already been stored
- in previous chunks. -}
meterupdate' = offsetMeterUpdate meterupdate bytesprocessed
{- Check if any of the chunk keys are present. If found, seek forward
- in the Handle, so it will be read starting at the first missing chunk.
- Returns the ChunkKeyStream truncated to start at the first missing
- chunk, and the number of bytes skipped due to resuming.
-
- As an optimisation, if the file fits into a single chunk, there's no need
- to check if that chunk is present -- we know it's not, because otherwise
- the whole file would be present and there would be no reason to try to
- store it.
-}
seekResume
:: Handle
-> ChunkKeyStream
-> (Key -> Annex (Either String Bool))
-> Annex (ChunkKeyStream, BytesProcessed)
seekResume h chunkkeys checker = do
sz <- liftIO (hFileSize h)
if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys)
then return (chunkkeys, zeroBytesProcessed)
else check 0 chunkkeys sz
where
check pos cks sz
| pos >= sz = do
-- All chunks are already stored!
liftIO $ hSeek h AbsoluteSeek sz
return (cks, toBytesProcessed sz)
| otherwise = do
v <- checker k
case v of
Right True ->
check pos' cks' sz
_ -> do
when (pos > 0) $
liftIO $ hSeek h AbsoluteSeek pos
return (cks, toBytesProcessed pos)
where
(k, cks') = nextChunkKeyStream cks
pos' = pos + fromMaybe 0 (keyChunkSize k)
{- Removes all chunks of a key from a remote, by calling a remover
- action on each.
-
- The remover action should succeed even if asked to
- remove a key that is not present on the remote.
-
- This action may be called on a chunked key. It will simply remove it.
-}
removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> Annex Bool
removeChunks remover u chunkconfig encryptor k = do
ls <- chunkKeys u chunkconfig k
ok <- allM (remover . encryptor) (concat ls)
when ok $ do
let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls
forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral
return ok
{- Retrieves a key from a remote, using a retriever action.
-
- When the remote is chunked, tries each of the options returned by
- chunkKeys until it finds one where the retriever successfully
- gets the first chunked key. The content of that key, and any
- other chunks in the list is fed to the sink.
-
- If retrival of one of the subsequent chunks throws an exception,
- gives up and returns False. Note that partial data may have been
- written to the sink in this case.
-
- Resuming is supported when using chunks. When the destination file
- already exists, it skips to the next chunked key that would be needed
- to resume.
-}
retrieveChunks
:: Retriever
-> UUID
-> ChunkConfig
-> EncKey
-> Key
-> FilePath
-> MeterUpdate
-> (Maybe Handle -> Maybe MeterUpdate -> ContentSource -> Annex Bool)
-> Annex Bool
retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
| noChunks chunkconfig =
-- Optimisation: Try the unchunked key first, to avoid
-- looking in the git-annex branch for chunk counts
-- that are likely not there.
getunchunked `catchNonAsyncAnnex`
const (go =<< chunkKeysOnly u basek)
| otherwise = go =<< chunkKeys u chunkconfig basek
where
go ls = do
currsize <- liftIO $ catchMaybeIO $
toInteger . fileSize <$> getFileStatus dest
let ls' = maybe ls (setupResume ls) currsize
if any null ls'
then return True -- dest is already complete
else firstavail currsize ls' `catchNonAsyncAnnex` giveup
giveup e = do
warning (show e)
return False
firstavail _ [] = return False
firstavail currsize ([]:ls) = firstavail currsize ls
firstavail currsize ((k:ks):ls)
| k == basek = getunchunked
`catchNonAsyncAnnex` (const $ firstavail currsize ls)
| otherwise = do
let offset = resumeOffset currsize k
let p = maybe basep
(offsetMeterUpdate basep . toBytesProcessed)
offset
v <- tryNonAsyncAnnex $
retriever (encryptor k) p $ \content ->
bracketIO (maybe opennew openresume offset) hClose $ \h -> do
void $ tosink (Just h) p content
let sz = toBytesProcessed $
fromMaybe 0 $ keyChunkSize k
getrest p h sz sz ks
`catchNonAsyncAnnex` giveup
case v of
Left e
| null ls -> giveup e
| otherwise -> firstavail currsize ls
Right r -> return r
getrest _ _ _ _ [] = return True
getrest p h sz bytesprocessed (k:ks) = do
let p' = offsetMeterUpdate p bytesprocessed
liftIO $ p' zeroBytesProcessed
ifM (retriever (encryptor k) p' $ tosink (Just h) p')
( getrest p h sz (addBytesProcessed bytesprocessed sz) ks
, giveup "chunk retrieval failed"
)
getunchunked = retriever (encryptor basek) basep $ tosink Nothing basep
opennew = openBinaryFile dest WriteMode
-- Open the file and seek to the start point in order to resume.
openresume startpoint = do
-- ReadWriteMode allows seeking; AppendMode does not.
h <- openBinaryFile dest ReadWriteMode
hSeek h AbsoluteSeek startpoint
return h
{- Progress meter updating is a bit tricky: If the Retriever
- populates a file, it is responsible for updating progress
- as the file is being retrieved.
-
- However, if the Retriever generates a lazy ByteString,
- it is not responsible for updating progress (often it cannot).
- Instead, the sink is passed a meter to update as it consumes
- the ByteString.
-}
tosink h p content = sink h p' content
where
p'
| isByteContent content = Just p
| otherwise = Nothing
{- Can resume when the chunk's offset is at or before the end of
- the dest file. -}
resumeOffset :: Maybe Integer -> Key -> Maybe Integer
resumeOffset Nothing _ = Nothing
resumeOffset currsize k
| offset <= currsize = offset
| otherwise = Nothing
where
offset = chunkKeyOffset k
{- Drops chunks that are already present in a file, based on its size.
- Keeps any non-chunk keys.
-}
setupResume :: [[Key]] -> Integer -> [[Key]]
setupResume ls currsize = map dropunneeded ls
where
dropunneeded [] = []
dropunneeded l@(k:_) = case keyChunkSize k of
Just chunksize | chunksize > 0 ->
genericDrop (currsize `div` chunksize) l
_ -> l
{- Checks if a key is present in a remote. This requires any one
- of the lists of options returned by chunkKeys to all check out
- as being present using the checker action.
-}
hasKeyChunks
:: (Key -> Annex (Either String Bool))
-> UUID
-> ChunkConfig
-> EncKey
-> Key
-> Annex (Either String Bool)
hasKeyChunks checker u chunkconfig encryptor basek
| noChunks chunkconfig =
-- Optimisation: Try the unchunked key first, to avoid
-- looking in the git-annex branch for chunk counts
-- that are likely not there.
ifM ((Right True ==) <$> checker (encryptor basek))
( return (Right True)
, checklists Nothing =<< chunkKeysOnly u basek
)
| otherwise = checklists Nothing =<< chunkKeys u chunkconfig basek
where
checklists Nothing [] = return (Right False)
checklists (Just deferrederror) [] = return (Left deferrederror)
checklists d (l:ls)
| not (null l) = do
v <- checkchunks l
case v of
Left e -> checklists (Just e) ls
Right True -> return (Right True)
Right False -> checklists Nothing ls
| otherwise = checklists d ls
checkchunks :: [Key] -> Annex (Either String Bool)
checkchunks [] = return (Right True)
checkchunks (k:ks) = do
v <- checker (encryptor k)
if v == Right True
then checkchunks ks
else return v
{- A key can be stored in a remote unchunked, or as a list of chunked keys.
- This can be the case whether or not the remote is currently configured
- to use chunking.
-
- It's even possible for a remote to have the same key stored multiple
- times with different chunk sizes!
-
- This finds all possible lists of keys that might be on the remote that
- can be combined to get back the requested key, in order from most to
- least likely to exist.
-}
chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]]
chunkKeys u chunkconfig k = do
l <- chunkKeysOnly u k
return $ if noChunks chunkconfig
then [k] : l
else l ++ [[k]]
chunkKeysOnly :: UUID -> Key -> Annex [[Key]]
chunkKeysOnly u k = map (toChunkList k) <$> getCurrentChunks u k
toChunkList :: Key -> (ChunkMethod, ChunkCount) -> [Key]
toChunkList k (FixedSizeChunks chunksize, chunkcount) =
takeChunkKeyStream chunkcount $ chunkKeyStream k chunksize
toChunkList _ (UnknownChunks _, _) = []

View file

@ -9,6 +9,7 @@ module Remote.Helper.Chunked.Legacy where
import Common.Annex
import Remote.Helper.Chunked
import Utility.Metered
import qualified Data.ByteString.Lazy as L
import qualified Control.Exception as E
@ -73,7 +74,7 @@ storeChunks key tmp dest storer recorder finalizer = either onerr return
finalizer tmp dest
return (not $ null stored)
onerr e = do
print e
warningIO (show e)
return False
basef = tmp ++ keyFile key
@ -104,7 +105,7 @@ storeChunked chunksize dests storer content = either onerr return
| otherwise = storechunks sz [] dests content
onerr e = do
print e
warningIO (show e)
return []
storechunks _ _ [] _ = return [] -- ran out of dests
@ -114,3 +115,12 @@ storeChunked chunksize dests storer content = either onerr return
let (chunk, b') = L.splitAt sz b
storer d chunk
storechunks sz (d:useddests) ds b'
{- Writes a series of chunks to a file. The feeder is called to get
- each chunk.
-}
meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
meteredWriteFileChunks meterupdate dest chunks feeder =
withBinaryFile dest WriteMode $ \h ->
forM_ chunks $
meteredWrite meterupdate h <=< feeder

View file

@ -0,0 +1,200 @@
{- Remotes that support both chunking and encryption.
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes #-}
module Remote.Helper.ChunkedEncryptable (
Preparer,
Storer,
Retriever,
simplyPrepare,
ContentSource,
checkPrepare,
fileStorer,
byteStorer,
fileRetriever,
byteRetriever,
storeKeyDummy,
retreiveKeyFileDummy,
chunkedEncryptableRemote,
module X
) where
import Common.Annex
import Types.StoreRetrieve
import Types.Remote
import Crypto
import Config.Cost
import Utility.Metered
import Remote.Helper.Chunked as X
import Remote.Helper.Encryptable as X
import Annex.Content
import Annex.Exception
import qualified Data.ByteString.Lazy as L
import Control.Exception (bracket)
-- Use when nothing needs to be done to prepare a helper.
simplyPrepare :: helper -> Preparer helper
simplyPrepare helper _ a = a $ Just helper
-- Use to run a check when preparing a helper.
checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper
checkPrepare checker helper k a = ifM (checker k)
( a (Just helper)
, a Nothing
)
-- A Storer that expects to be provided with a file containing
-- the content of the key to store.
fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer
fileStorer a k (FileContent f) m = a k f m
fileStorer a k (ByteContent b) m = withTmp k $ \f -> do
liftIO $ L.writeFile f b
a k f m
-- A Storer that expects to be provided with a L.ByteString of
-- the content to store.
byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer
byteStorer a k c m = withBytes c $ \b -> a k b m
-- A Retriever that writes the content of a Key to a provided file.
-- It is responsible for updating the progress meter as it retrieves data.
fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a k m callback = do
f <- prepTmp k
a f k m
callback (FileContent f)
-- A Retriever that generates a L.ByteString containing the Key's content.
byteRetriever :: (Key -> Annex L.ByteString) -> Retriever
byteRetriever a k _m callback = callback =<< (ByteContent <$> a k)
{- The base Remote that is provided to chunkedEncryptableRemote
- needs to have storeKey and retreiveKeyFile methods, but they are
- never actually used (since chunkedEncryptableRemote replaces
- them). Here are some dummy ones.
-}
storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool
storeKeyDummy _ _ _ = return False
retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retreiveKeyFileDummy _ _ _ _ = return False
-- Modifies a base Remote to support both chunking and encryption.
chunkedEncryptableRemote
:: RemoteConfig
-> Preparer Storer
-> Preparer Retriever
-> Remote
-> Remote
chunkedEncryptableRemote c preparestorer prepareretriever baser = encr
where
encr = baser
{ storeKey = \k _f p -> cip >>= storeKeyGen k p
, retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p
, retrieveKeyFileCheap = \k d -> cip >>= maybe
(retrieveKeyFileCheap baser k d)
(\_ -> return False)
, removeKey = \k -> cip >>= removeKeyGen k
, hasKey = \k -> cip >>= hasKeyGen k
, cost = maybe
(cost baser)
(const $ cost baser + encryptedRemoteCostAdj)
(extractCipher c)
}
cip = cipherKey c
chunkconfig = chunkConfig c
gpgopts = getGpgEncParams encr
safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False)
-- chunk, then encrypt, then feed to the storer
storeKeyGen k p enc =
safely $ preparestorer k $ safely . go
where
go (Just storer) = sendAnnex k rollback $ \src ->
metered (Just p) k $ \p' ->
storeChunks (uuid baser) chunkconfig k src p'
(storechunk enc storer)
(hasKey baser)
go Nothing = return False
rollback = void $ removeKey encr k
storechunk Nothing storer k content p = storer k content p
storechunk (Just (cipher, enck)) storer k content p =
withBytes content $ \b ->
encrypt gpgopts cipher (feedBytes b) $
readBytes $ \encb ->
storer (enck k) (ByteContent encb) p
-- call retriever to get chunks; decrypt them; stream to dest file
retrieveKeyFileGen k dest p enc =
safely $ prepareretriever k $ safely . go
where
go (Just retriever) = metered (Just p) k $ \p' ->
retrieveChunks retriever (uuid baser) chunkconfig
enck k dest p' (sink dest enc)
go Nothing = return False
enck = maybe id snd enc
removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k
where
enck = maybe id snd enc
remover = removeKey baser
hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k
where
enck = maybe id snd enc
checker = hasKey baser
{- Sink callback for retrieveChunks. Stores the file content into the
- provided Handle, decrypting it first if necessary.
-
- If the remote did not store the content using chunks, no Handle
- will be provided, and it's up to us to open the destination file.
-
- Note that when neither chunking nor encryption is used, and the remote
- provides FileContent, that file only needs to be renamed
- into place. (And it may even already be in the right place..)
-}
sink
:: FilePath
-> Maybe (Cipher, EncKey)
-> Maybe Handle
-> Maybe MeterUpdate
-> ContentSource
-> Annex Bool
sink dest enc mh mp content = do
case (enc, mh, content) of
(Nothing, Nothing, FileContent f)
| f == dest -> noop
| otherwise -> liftIO $ moveFile f dest
(Just (cipher, _), _, ByteContent b) ->
decrypt cipher (feedBytes b) $
readBytes write
(Just (cipher, _), _, FileContent f) -> do
withBytes content $ \b ->
decrypt cipher (feedBytes b) $
readBytes write
liftIO $ nukeFile f
(Nothing, _, FileContent f) -> do
withBytes content write
liftIO $ nukeFile f
(Nothing, _, ByteContent b) -> write b
return True
where
write b = case mh of
Just h -> liftIO $ b `streamto` h
Nothing -> liftIO $ bracket opendest hClose (b `streamto`)
streamto b h = case mp of
Just p -> meteredWrite p h b
Nothing -> L.hPut h b
opendest = openBinaryFile dest WriteMode
withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a
withBytes (ByteContent b) a = a b
withBytes (FileContent f) a = a =<< liftIO (L.readFile f)

View file

@ -66,44 +66,45 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c
c' = foldr M.delete c
-- git-annex used to remove 'encryption' as well, since
-- it was redundant; we now need to keep it for
-- public-key incryption, hence we leave it on newer
-- public-key encryption, hence we leave it on newer
-- remotes (while being backward-compatible).
[ "keyid", "keyid+", "keyid-", "highRandomQuality" ]
{- Modifies a Remote to support encryption.
-
- Two additional functions must be provided by the remote,
- to support storing and retrieving encrypted content. -}
{- Modifies a Remote to support encryption. -}
-- TODO: deprecated
encryptableRemote
:: RemoteConfig
-> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool)
-> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool)
-> Remote
-> Remote
encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
r {
storeKey = store,
retrieveKeyFile = retrieve,
retrieveKeyFileCheap = retrieveCheap,
removeKey = withkey $ removeKey r,
hasKey = withkey $ hasKey r,
cost = maybe
(cost r)
(const $ cost r + encryptedRemoteCostAdj)
(extractCipher c)
}
where
store k f p = cip k >>= maybe
encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r
{ storeKey = \k f p -> cip k >>= maybe
(storeKey r k f p)
(\enck -> storeKeyEncrypted enck k p)
retrieve k f d p = cip k >>= maybe
(\v -> storeKeyEncrypted v k p)
, retrieveKeyFile = \k f d p -> cip k >>= maybe
(retrieveKeyFile r k f d p)
(\enck -> retrieveKeyFileEncrypted enck k d p)
retrieveCheap k d = cip k >>= maybe
(\v -> retrieveKeyFileEncrypted v k d p)
, retrieveKeyFileCheap = \k d -> cip k >>= maybe
(retrieveKeyFileCheap r k d)
(\_ -> return False)
withkey a k = cip k >>= maybe (a k) (a . snd)
cip = cipherKey c
, removeKey = \k -> cip k >>= maybe
(removeKey r k)
(\(_, enckey) -> removeKey r enckey)
, hasKey = \k -> cip k >>= maybe
(hasKey r k)
(\(_, enckey) -> hasKey r enckey)
, cost = maybe
(cost r)
(const $ cost r + encryptedRemoteCostAdj)
(extractCipher c)
}
where
cip k = do
v <- cipherKey c
return $ case v of
Nothing -> Nothing
Just (cipher, enck) -> Just (cipher, enck k)
{- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex
- state. -}
@ -136,11 +137,11 @@ embedCreds c
| isJust (M.lookup "cipherkeys" c) && isJust (M.lookup "cipher" c) = True
| otherwise = False
{- Gets encryption Cipher, and encrypted version of Key. -}
cipherKey :: RemoteConfig -> Key -> Annex (Maybe (Cipher, Key))
cipherKey c k = fmap make <$> remoteCipher c
{- Gets encryption Cipher, and key encryptor. -}
cipherKey :: RemoteConfig -> Annex (Maybe (Cipher, EncKey))
cipherKey c = fmap make <$> remoteCipher c
where
make ciphertext = (ciphertext, encryptKey mac ciphertext k)
make ciphertext = (ciphertext, encryptKey mac ciphertext)
mac = fromMaybe defaultMac $ M.lookup "mac" c >>= readMac
{- Stores an StorableCipher in a remote's configuration. -}

View file

@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE ScopedTypeVariables, CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Remote.WebDAV (remote, davCreds, configUrl) where
@ -16,11 +16,7 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
import qualified Data.ByteString.Lazy as L
import qualified Control.Exception as E
import qualified Control.Exception.Lifted as EL
#if MIN_VERSION_DAV(0,6,0)
import Network.HTTP.Client (HttpException(..))
#else
import Network.HTTP.Conduit (HttpException(..))
#endif
import Network.HTTP.Types
import System.Log.Logger (debugM)
import System.IO.Error
@ -113,7 +109,7 @@ storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString ->
storeHelper r k baseurl user pass b = catchBoolIO $ do
mkdirRecursiveDAV tmpurl user pass
case chunkconfig of
NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do
NoChunks -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do
storehttp tmpurl b
finalizer tmpurl keyurl
return True
@ -140,7 +136,7 @@ retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex
retrieve r k _f d p = metered (Just p) k $ \meterupdate ->
davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $
withStoredFiles r k baseurl user pass onerr $ \urls -> do
meteredWriteFileChunks meterupdate d urls $ \url -> do
Legacy.meteredWriteFileChunks meterupdate d urls $ \url -> do
mb <- getDAV url user pass
case mb of
Nothing -> throwIO "download failed"
@ -308,57 +304,37 @@ debugDAV :: DavUrl -> String -> IO ()
debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url
{---------------------------------------------------------------------
- Low-level DAV operations, using the new DAV monad when available.
- Low-level DAV operations.
---------------------------------------------------------------------}
putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO ()
putDAV url user pass b = do
debugDAV "PUT" url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ putContentM (contentType, b)
#else
putContent url user pass (contentType, b)
#endif
getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString)
getDAV url user pass = do
debugDAV "GET" url
eitherToMaybe <$> tryNonAsync go
where
#if MIN_VERSION_DAV(0,6,0)
go = goDAV url user pass $ snd <$> getContentM
#else
go = snd . snd <$> getPropsAndContent url user pass
#endif
deleteDAV :: DavUrl -> DavUser -> DavPass -> IO ()
deleteDAV url user pass = do
debugDAV "DELETE" url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass delContentM
#else
deleteContent url user pass
#endif
moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO ()
moveDAV url newurl user pass = do
debugDAV ("MOVE to " ++ newurl ++ " from ") url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ moveContentM newurl'
#else
moveContent url newurl' user pass
#endif
where
newurl' = B8.fromString newurl
mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool
mkdirDAV url user pass = do
debugDAV "MKDIR" url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass mkCol
#else
makeCollection url user pass
#endif
existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool)
existsDAV url user pass = do
@ -366,35 +342,19 @@ existsDAV url user pass = do
either (Left . show) id <$> tryNonAsync check
where
ispresent = return . Right
#if MIN_VERSION_DAV(0,6,0)
check = goDAV url user pass $ do
setDepth Nothing
EL.catchJust
(matchStatusCodeException notFound404)
(getPropsM >> ispresent True)
(const $ ispresent False)
#else
check = E.catchJust
(matchStatusCodeException notFound404)
#if ! MIN_VERSION_DAV(0,4,0)
(getProps url user pass >> ispresent True)
#else
(getProps url user pass Nothing >> ispresent True)
#endif
(const $ ispresent False)
#endif
matchStatusCodeException :: Status -> HttpException -> Maybe ()
#if MIN_VERSION_DAV(0,6,0)
matchStatusCodeException want (StatusCodeException s _ _)
#else
matchStatusCodeException want (StatusCodeException s _)
#endif
| s == want = Just ()
| otherwise = Nothing
matchStatusCodeException _ _ = Nothing
#if MIN_VERSION_DAV(0,6,0)
goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a
goDAV url user pass a = choke $ evalDAVT url $ do
setResponseTimeout Nothing -- disable default (5 second!) timeout
@ -407,4 +367,3 @@ goDAV url user pass a = choke $ evalDAVT url $ do
case x of
Left e -> error e
Right r -> return r
#endif

View file

@ -15,9 +15,16 @@ import Types.KeySource
data BackendA a = Backend
{ name :: String
, getKey :: KeySource -> a (Maybe Key)
-- Checks the content of a key.
, fsckKey :: Maybe (Key -> FilePath -> a Bool)
-- Checks if a key can be upgraded to a better form.
, canUpgradeKey :: Maybe (Key -> Bool)
-- Checks if there is a fast way to migrate a key to a different
-- backend (ie, without re-hashing).
, fastMigrate :: Maybe (Key -> BackendA a -> Maybe Key)
-- Checks if a key is known (or assumed) to always refer to the
-- same data.
, isStableKey :: Key -> Bool
}
instance Show (BackendA a) where

View file

@ -69,6 +69,7 @@ data CommandSection
| SectionMetaData
| SectionUtility
| SectionPlumbing
| SectionTesting
deriving (Eq, Ord, Enum, Bounded)
descSection :: CommandSection -> String
@ -79,3 +80,4 @@ descSection SectionQuery = "Query commands"
descSection SectionMetaData = "Metadata commands"
descSection SectionUtility = "Utility commands"
descSection SectionPlumbing = "Plumbing commands"
descSection SectionTesting = "Testing commands"

View file

@ -13,6 +13,8 @@ module Types.Key (
stubKey,
key2file,
file2key,
nonChunkKey,
chunkKeyOffset,
prop_idempotent_key_encode,
prop_idempotent_key_decode
@ -47,6 +49,19 @@ stubKey = Key
, keyChunkNum = Nothing
}
-- Gets the parent of a chunk key.
nonChunkKey :: Key -> Key
nonChunkKey k = k
{ keyChunkSize = Nothing
, keyChunkNum = Nothing
}
-- Where a chunk key is offset within its parent.
chunkKeyOffset :: Key -> Maybe Integer
chunkKeyOffset k = (*)
<$> keyChunkSize k
<*> (pred <$> keyChunkNum k)
fieldSep :: Char
fieldSep = '-'

View file

@ -56,7 +56,9 @@ data RemoteA a = Remote {
name :: RemoteName,
-- Remotes have a use cost; higher is more expensive
cost :: Cost,
-- Transfers a key to the remote.
-- Transfers a key's contents from disk to the remote.
-- The key should not appear to be present on the remote until
-- all of its contents have been transferred.
storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool,
-- Retrieves a key's contents to a file.
-- (The MeterUpdate does not need to be used if it retrieves
@ -64,7 +66,7 @@ data RemoteA a = Remote {
retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a Bool,
-- retrieves a key's contents to a tmp file, if it can be done cheaply
retrieveKeyFileCheap :: Key -> FilePath -> a Bool,
-- removes a key's contents
-- removes a key's contents (succeeds if the contents are not present)
removeKey :: Key -> a Bool,
-- Checks if a key is present in the remote; if the remote
-- cannot be accessed returns a Left error message.

37
Types/StoreRetrieve.hs Normal file
View file

@ -0,0 +1,37 @@
{- Types for Storer and Retriever actions for remotes.
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE Rank2Types #-}
module Types.StoreRetrieve where
import Common.Annex
import Utility.Metered
import qualified Data.ByteString.Lazy as L
-- Prepares for and then runs an action that will act on a Key's
-- content, passing it a helper when the preparation is successful.
type Preparer helper = forall a. Key -> (Maybe helper -> Annex a) -> Annex a
-- A source of a Key's content.
data ContentSource
= FileContent FilePath
| ByteContent L.ByteString
isByteContent :: ContentSource -> Bool
isByteContent (ByteContent _) = True
isByteContent (FileContent _) = False
-- Action that stores a Key's content on a remote.
-- Can throw exceptions.
type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool
-- Action that retrieves a Key's content from a remote, passing it to a
-- callback.
-- Throws exception if key is not present, or remote is not accessible.
type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool

View file

@ -11,14 +11,15 @@ module Utility.Gpg where
import Control.Applicative
import Control.Concurrent
import Control.Monad.IO.Class
import qualified Data.Map as M
import Control.Monad.Catch (bracket, MonadMask)
import Common
import qualified Build.SysConfig as SysConfig
#ifndef mingw32_HOST_OS
import System.Posix.Types
import Control.Exception (bracket)
import System.Path
import Utility.Env
#else
@ -104,18 +105,18 @@ pipeStrict params input = do
-
- Note that to avoid deadlock with the cleanup stage,
- the reader must fully consume gpg's input before returning. -}
feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a
feedRead :: (MonadIO m, MonadMask m) => [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> m a) -> m a
feedRead params passphrase feeder reader = do
#ifndef mingw32_HOST_OS
-- pipe the passphrase into gpg on a fd
(frompipe, topipe) <- createPipe
void $ forkIO $ do
(frompipe, topipe) <- liftIO createPipe
liftIO $ void $ forkIO $ do
toh <- fdToHandle topipe
hPutStrLn toh passphrase
hClose toh
let Fd pfd = frompipe
let passphrasefd = [Param "--passphrase-fd", Param $ show pfd]
closeFd frompipe `after` go (passphrasefd ++ params)
liftIO (closeFd frompipe) `after` go (passphrasefd ++ params)
#else
-- store the passphrase in a temp file for gpg
withTmpFile "gpg" $ \tmpfile h -> do
@ -128,15 +129,24 @@ feedRead params passphrase feeder reader = do
go params' = pipeLazy params' feeder reader
{- Like feedRead, but without passphrase. -}
pipeLazy :: [CommandParam] -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a
pipeLazy :: (MonadIO m, MonadMask m) => [CommandParam] -> (Handle -> IO ()) -> (Handle -> m a) -> m a
pipeLazy params feeder reader = do
params' <- stdParams $ Param "--batch" : params
withBothHandles createProcessSuccess (proc gpgcmd params')
$ \(to, from) -> do
void $ forkIO $ do
feeder to
hClose to
reader from
params' <- liftIO $ stdParams $ Param "--batch" : params
let p = (proc gpgcmd params')
{ std_in = CreatePipe
, std_out = CreatePipe
, std_err = Inherit
}
bracket (setup p) (cleanup p) go
where
setup = liftIO . createProcess
cleanup p (_, _, _, pid) = liftIO $ forceSuccessProcess p pid
go p = do
let (to, from) = bothHandles p
liftIO $ void $ forkIO $ do
feeder to
hClose to
reader from
{- Finds gpg public keys matching some string. (Could be an email address,
- a key id, or a name; See the section 'HOW TO SPECIFY A USER ID' of

View file

@ -16,6 +16,7 @@ import qualified Data.ByteString as S
import System.IO.Unsafe
import Foreign.Storable (Storable(sizeOf))
import System.Posix.Types
import Data.Int
{- An action that can be run repeatedly, updating it on the bytes processed.
-
@ -23,6 +24,9 @@ import System.Posix.Types
- far, *not* an incremental amount since the last call. -}
type MeterUpdate = (BytesProcessed -> IO ())
nullMeterUpdate :: MeterUpdate
nullMeterUpdate _ = return ()
{- Total number of bytes processed so far. -}
newtype BytesProcessed = BytesProcessed Integer
deriving (Eq, Ord, Show)
@ -31,6 +35,10 @@ class AsBytesProcessed a where
toBytesProcessed :: a -> BytesProcessed
fromBytesProcessed :: BytesProcessed -> a
instance AsBytesProcessed BytesProcessed where
toBytesProcessed = id
fromBytesProcessed = id
instance AsBytesProcessed Integer where
toBytesProcessed i = BytesProcessed i
fromBytesProcessed (BytesProcessed i) = i
@ -39,6 +47,10 @@ instance AsBytesProcessed Int where
toBytesProcessed i = BytesProcessed $ toInteger i
fromBytesProcessed (BytesProcessed i) = fromInteger i
instance AsBytesProcessed Int64 where
toBytesProcessed i = BytesProcessed $ toInteger i
fromBytesProcessed (BytesProcessed i) = fromInteger i
instance AsBytesProcessed FileOffset where
toBytesProcessed sz = BytesProcessed $ toInteger sz
fromBytesProcessed (BytesProcessed sz) = fromInteger sz
@ -77,6 +89,13 @@ meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO ()
meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h ->
meteredWrite meterupdate h b
{- Applies an offset to a MeterUpdate. This can be useful when
- performing a sequence of actions, such as multiple meteredWriteFiles,
- that all update a common meter progressively. Or when resuming.
-}
offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate
offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n)
{- This is like L.hGetContents, but after each chunk is read, a meter
- is updated based on the size of the chunk.
-

View file

@ -31,6 +31,7 @@ module Utility.Process (
stdinHandle,
stdoutHandle,
stderrHandle,
bothHandles,
processHandle,
devNull,
) where

View file

@ -9,11 +9,12 @@
module Utility.Tmp where
import Control.Exception (bracket)
import System.IO
import System.Directory
import Control.Monad.IfElse
import System.FilePath
import Control.Monad.IO.Class
import Control.Monad.Catch (bracket, MonadMask)
import Utility.Exception
import Utility.FileSystemEncoding
@ -42,18 +43,18 @@ viaTmp a file content = bracket setup cleanup use
{- Runs an action with a tmp file located in the system's tmp directory
- (or in "." if there is none) then removes the file. -}
withTmpFile :: Template -> (FilePath -> Handle -> IO a) -> IO a
withTmpFile :: (MonadIO m, MonadMask m) => Template -> (FilePath -> Handle -> m a) -> m a
withTmpFile template a = do
tmpdir <- catchDefaultIO "." getTemporaryDirectory
tmpdir <- liftIO $ catchDefaultIO "." getTemporaryDirectory
withTmpFileIn tmpdir template a
{- Runs an action with a tmp file located in the specified directory,
- then removes the file. -}
withTmpFileIn :: FilePath -> Template -> (FilePath -> Handle -> IO a) -> IO a
withTmpFileIn :: (MonadIO m, MonadMask m) => FilePath -> Template -> (FilePath -> Handle -> m a) -> m a
withTmpFileIn tmpdir template a = bracket create remove use
where
create = openTempFile tmpdir template
remove (name, handle) = do
create = liftIO $ openTempFile tmpdir template
remove (name, handle) = liftIO $ do
hClose handle
catchBoolIO (removeFile name >> return True)
use (name, handle) = a name handle

9
debian/changelog vendored
View file

@ -1,5 +1,12 @@
git-annex (5.20140718) UNRELEASED; urgency=medium
* New chunk= option to chunk files stored in special remotes.
Currently supported by: directory, and all external special remotes.
* Partially transferred files are automatically resumed when using
chunked remotes!
* The old chunksize= option is deprecated. Do not use for new remotes.
* Legacy code for directory remotes using the old chunksize= option
will keep them working, but more slowly than before.
* webapp: Automatically install Konqueror integration scripts
to get and drop files.
* repair: Removing bad objects could leave fsck finding no more
@ -8,6 +15,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium
were incompletely repaired before.
* Fix cost calculation for non-encrypted remotes.
* Display exception message when a transfer fails due to an exception.
* WebDAV: Dropped support for DAV before 0.6.1.
* testremote: New command to test uploads/downloads to a remote.
-- Joey Hess <joeyh@debian.org> Mon, 21 Jul 2014 14:41:26 -0400

31
doc/chunking.mdwn Normal file
View file

@ -0,0 +1,31 @@
Some [[special_remotes]] have support for breaking large files up into
chunks that are stored on the remote.
This can be useful to work around limitations on the size of files
on the remote.
Chunking also allows for resuming interrupted downloads and uploads.
Note that git-annex has to buffer chunks in memory before they are sent to
a remote. So, using a large chunk size will make it use more memory.
To enable chunking, pass a `chunk=nnMiB` parameter to `git annex
initremote, specifying the chunk size.
Good chunk sizes will depend on the remote, but a good starting place
is probably `1MiB`. Very large chunks are problimatic, both because
git-annex needs to buffer one chunk in memory when uploading, and because
a larger chunk will make resuming interrupted transfers less efficient.
On the other hand, when a file is split into a great many chunks,
there can be increased overhead of making many requests to the remote.
To disable chunking of a remote that was using chunking,
pass `chunk=0` to `git annex enableremote`. Any content already stored on
the remote using chunks will continue to be accessed via chunks, this
just prevents using chunks when storing new content.
To change the chunk size, pass a `chunk=nnMiB` parameter to
`git annex enableremote`. This only affects the chunk sized used when
storing new content.
See also: [[design document|design/assistant/chunks]]

View file

@ -231,6 +231,15 @@ cannot check exact file sizes.
If padding is enabled, gpg compression should be disabled, to not leak
clues about how well the files compress and so what kind of file it is.
## chunk key hashing
A chunk key should hash into the same directory structure as its parent
key. This will avoid lots of extra hash directories when using chunking
with non-encrypted keys.
Won't happen when the key is encrypted, but that is good; hashing to the
same bucket then would allow statistical correlation.
## resuming interupted transfers
Resuming interrupted downloads, and uploads are both possible.

View file

@ -101,12 +101,14 @@ The following requests *must* all be supported by the special remote.
Tells the special remote it's time to prepare itself to be used.
Only INITREMOTE can come before this.
* `TRANSFER STORE|RETRIEVE Key File`
Requests the transfer of a key. For Send, the File is the file to upload;
for Receive the File is where to store the download.
Requests the transfer of a key. For STORE, the File is the file to upload;
for RETRIEVE the File is where to store the download.
Note that the File should not influence the filename used on the remote.
The filename will not contain any whitespace.
Note that it's important that, while a Key is being stored, CHECKPRESENT
not indicate it's present until all the data has been transferred.
Multiple transfers might be requested by git-annex, but it's fine for the
program to serialize them and only do one at a time.
program to serialize them and only do one at a time.
* `CHECKPRESENT Key`
Requests the remote check if a key is present in it.
* `REMOVE Key`
@ -286,7 +288,6 @@ start a new process the next time it needs to use a remote.
the remote. However, \n and probably \0 need to be escaped somehow in the
file data, which adds complication.
* uuid discovery during INITREMOTE.
* Support for splitting files into chunks.
* Support for getting and setting the list of urls that can be associated
with a key.
* Hook into webapp. Needs a way to provide some kind of prompt to the user

View file

@ -949,12 +949,6 @@ subdirectories).
Merge conflicts between two files that are not annexed will not be
automatically resolved.
* `test`
This runs git-annex's built-in test suite.
There are several parameters, provided by Haskell's tasty test framework.
* `remotedaemon`
Detects when network remotes have received git pushes and fetches from them.
@ -963,6 +957,35 @@ subdirectories).
This command is used internally to perform git pulls over XMPP.
# TESTING COMMANDS
* `test`
This runs git-annex's built-in test suite.
There are several parameters, provided by Haskell's tasty test framework.
Pass --help for details.
* `testremote remote`
This tests a remote by generating some random objects and sending them to
the remote, then redownloading them, removing them from the remote, etc.
It's safe to run in an existing repository (the repository contents are
not altered), although it may perform expensive data transfers.
The --size option can be used to tune the size of the generated objects.
Testing a single remote will use the remote's configuration,
automatically varying the chunk sizes, and with simple shared encryption
enabled and disabled.
* `fuzztest`
Generates random changes to files in the current repository,
for use in testing the assistant. This is dangerous, so it will not
do anything unless --forced.
# OPTIONS
* `--force`

View file

@ -36,3 +36,8 @@ string, but where that would normally encode the bits using the 16 characters
0-9a-f, this instead uses the 32 characters "0123456789zqjxkmvwgpfZQJXKMVWGPF".
The first 2 letters of the resulting string are the first directory, and the
second 2 are the second directory.
## chunk keys
The same hash directory is used for a chunk key as would be used for the
key that it's a chunk of.

View file

@ -25,13 +25,11 @@ remote:
* `keyid` - Specifies the gpg key to use for [[encryption]].
* `chunksize` - Avoid storing files larger than the specified size in the
directory. For use on directories on mount points that have file size
limitations. The default is to never chunk files.
The value can use specified using any commonly used units.
Example: `chunksize=100 megabytes`
Note that enabling chunking on an existing remote with non-chunked
files is not recommended; nor is changing the chunksize.
* `chunk` - Enables [[chunking]] when storing large files.
* `chunksize` - Deprecated version of chunk parameter above.
Do not use for new remotes. It is not safe to change the chunksize
setting of an existing remote.
Setup example:

View file

@ -128,14 +128,25 @@ while read line; do
STORE)
# Store the file to a location
# based on the key.
# XXX when possible, send PROGRESS
# XXX when at all possible, send PROGRESS
calclocation "$key"
mkdir -p "$(dirname "$LOC")"
if runcmd cp "$file" "$LOC"; then
# Store in temp file first, so that
# CHECKPRESENT does not see it
# until it is all stored.
mkdir -p "$mydirectory/tmp"
tmp="$mydirectory/tmp/$key"
if runcmd cp "$file" "$tmp" \
&& runcmd mv -f "$tmp" "$LOC"; then
echo TRANSFER-SUCCESS STORE "$key"
else
echo TRANSFER-FAILURE STORE "$key"
fi
mkdir -p "$(dirname "$LOC")"
# The file may already exist, so
# make sure we can overwrite it.
chmod 644 "$LOC" 2>/dev/null || true
;;
RETRIEVE)
# Retrieve from a location based on

View file

@ -29,13 +29,11 @@ the webdav remote.
be created as needed. Use of a https URL is strongly
encouraged, since HTTP basic authentication is used.
* `chunksize` - Avoid storing files larger than the specified size in
WebDAV. For use when the WebDAV server has file size
limitations. The default is to never chunk files.
The value can use specified using any commonly used units.
Example: `chunksize=75 megabytes`
Note that enabling chunking on an existing remote with non-chunked
files is not recommended, nor is changing the chunksize.
* `chunk` - Enables [[chunking]] when storing large files.
* `chunksize` - Deprecated version of chunk parameter above.
Do not use for new remotes. It is not safe to change the chunksize
setting of an existing remote.
Setup example:

View file

@ -5,9 +5,9 @@ for providing 50 gb of free storage if you sign up with its Android client.
git-annex can use Box as a [[special remote|special_remotes]].
Recent versions of git-annex make this very easy to set up:
WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunksize=75mb encryption=shared
WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunk=50mb encryption=shared
Note the use of chunksize; Box has a 100 mb maximum file size, and this
Note the use of [[chunking]]; Box has a 100 mb maximum file size, and this
breaks up large files into chunks before that limit is reached.
# old davfs2 method
@ -58,7 +58,7 @@ Create the special remote, in your git-annex repository.
** This example is non-encrypted; fill in your gpg key ID for a securely
encrypted special remote! **
git annex initremote box.com type=directory directory=/media/box.com chunksize=2mb encryption=none
git annex initremote box.com type=directory directory=/media/box.com chunk=2mb encryption=none
Now git-annex can copy files to box.com, get files from it, etc, just like
with any other special remote.

View file

@ -124,7 +124,7 @@ Executable git-annex
if flag(TestSuite)
Build-Depends: tasty (>= 0.7), tasty-hunit, tasty-quickcheck, tasty-rerun,
optparse-applicative
optparse-applicative, crypto-api
CPP-Options: -DWITH_TESTSUITE
if flag(TDFA)
@ -142,7 +142,7 @@ Executable git-annex
CPP-Options: -DWITH_S3
if flag(WebDAV)
Build-Depends: DAV ((>= 0.3 && < 0.6) || > 0.6),
Build-Depends: DAV (> 0.6),
http-client, http-conduit, http-types, lifted-base
CPP-Options: -DWITH_WEBDAV