Merge branch 'newchunks'

I am happy enough with this to make it live!
This commit is contained in:
Joey Hess 2014-08-01 18:00:47 -04:00
commit 5aa2286e7b
44 changed files with 1357 additions and 389 deletions

View file

@ -16,6 +16,7 @@ module Annex.Content (
getViaTmpChecked, getViaTmpChecked,
getViaTmpUnchecked, getViaTmpUnchecked,
prepGetViaTmpChecked, prepGetViaTmpChecked,
prepTmp,
withTmp, withTmp,
checkDiskSpace, checkDiskSpace,
moveAnnex, moveAnnex,
@ -264,7 +265,10 @@ prepTmp key = do
createAnnexDirectory (parentDir tmp) createAnnexDirectory (parentDir tmp)
return tmp return tmp
{- Creates a temp file, runs an action on it, and cleans up the temp file. -} {- Creates a temp file for a key, runs an action on it, and cleans up
- the temp file. If the action throws an exception, the temp file is
- left behind, which allows for resuming.
-}
withTmp :: Key -> (FilePath -> Annex a) -> Annex a withTmp :: Key -> (FilePath -> Annex a) -> Annex a
withTmp key action = do withTmp key action = do
tmp <- prepTmp key tmp <- prepTmp key

View file

@ -5,12 +5,13 @@
- AnnexState are retained. This works because the Annex monad - AnnexState are retained. This works because the Annex monad
- internally stores the AnnexState in a MVar. - internally stores the AnnexState in a MVar.
- -
- Copyright 2011-2013 Joey Hess <joey@kitenet.net> - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Annex.Exception ( module Annex.Exception (
bracketIO, bracketIO,
@ -19,6 +20,8 @@ module Annex.Exception (
tryAnnexIO, tryAnnexIO,
throwAnnex, throwAnnex,
catchAnnex, catchAnnex,
catchNonAsyncAnnex,
tryNonAsyncAnnex,
) where ) where
import qualified Control.Monad.Catch as M import qualified Control.Monad.Catch as M
@ -48,3 +51,13 @@ throwAnnex = M.throwM
{- catch in the Annex monad -} {- catch in the Annex monad -}
catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a
catchAnnex = M.catch catchAnnex = M.catch
{- catchs all exceptions except for async exceptions -}
catchNonAsyncAnnex :: Annex a -> (SomeException -> Annex a) -> Annex a
catchNonAsyncAnnex a onerr = a `M.catches`
[ M.Handler (\ (e :: AsyncException) -> throwAnnex e)
, M.Handler (\ (e :: SomeException) -> onerr e)
]
tryNonAsyncAnnex :: Annex a -> Annex (Either SomeException a)
tryNonAsyncAnnex a = (Right <$> a) `catchNonAsyncAnnex` (return . Left)

View file

@ -184,7 +184,7 @@ runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant ()
runHandler handler file filestatus = void $ do runHandler handler file filestatus = void $ do
r <- tryIO <~> handler (normalize file) filestatus r <- tryIO <~> handler (normalize file) filestatus
case r of case r of
Left e -> liftIO $ print e Left e -> liftIO $ warningIO $ show e
Right Nothing -> noop Right Nothing -> noop
Right (Just change) -> do Right (Just change) -> do
-- Just in case the commit thread is not -- Just in case the commit thread is not

View file

@ -14,7 +14,8 @@ module Backend (
isAnnexLink, isAnnexLink,
chooseBackend, chooseBackend,
lookupBackendName, lookupBackendName,
maybeLookupBackendName maybeLookupBackendName,
isStableKey,
) where ) where
import Common.Annex import Common.Annex
@ -32,6 +33,8 @@ import qualified Backend.Hash
import qualified Backend.WORM import qualified Backend.WORM
import qualified Backend.URL import qualified Backend.URL
import qualified Data.Map as M
list :: [Backend] list :: [Backend]
list = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends list = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends
@ -116,7 +119,13 @@ lookupBackendName :: String -> Backend
lookupBackendName s = fromMaybe unknown $ maybeLookupBackendName s lookupBackendName s = fromMaybe unknown $ maybeLookupBackendName s
where where
unknown = error $ "unknown backend " ++ s unknown = error $ "unknown backend " ++ s
maybeLookupBackendName :: String -> Maybe Backend maybeLookupBackendName :: String -> Maybe Backend
maybeLookupBackendName s = headMaybe matches maybeLookupBackendName s = M.lookup s nameMap
where
matches = filter (\b -> s == B.name b) list nameMap :: M.Map String Backend
nameMap = M.fromList $ zip (map B.name list) list
isStableKey :: Key -> Bool
isStableKey k = maybe False (`B.isStableKey` k)
(maybeLookupBackendName (keyBackendName k))

View file

@ -7,7 +7,10 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
module Backend.Hash (backends) where module Backend.Hash (
backends,
testKeyBackend,
) where
import Common.Annex import Common.Annex
import qualified Annex import qualified Annex
@ -36,24 +39,23 @@ hashes = concat
{- The SHA256E backend is the default, so genBackendE comes first. -} {- The SHA256E backend is the default, so genBackendE comes first. -}
backends :: [Backend] backends :: [Backend]
backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes backends = map genBackendE hashes ++ map genBackend hashes
genBackend :: Hash -> Maybe Backend genBackend :: Hash -> Backend
genBackend hash = Just Backend genBackend hash = Backend
{ name = hashName hash { name = hashName hash
, getKey = keyValue hash , getKey = keyValue hash
, fsckKey = Just $ checkKeyChecksum hash , fsckKey = Just $ checkKeyChecksum hash
, canUpgradeKey = Just needsUpgrade , canUpgradeKey = Just needsUpgrade
, fastMigrate = Just trivialMigrate , fastMigrate = Just trivialMigrate
, isStableKey = const True
} }
genBackendE :: Hash -> Maybe Backend genBackendE :: Hash -> Backend
genBackendE hash = do genBackendE hash = (genBackend hash)
b <- genBackend hash { name = hashNameE hash
return $ b , getKey = keyValueE hash
{ name = hashNameE hash }
, getKey = keyValueE hash
}
hashName :: Hash -> String hashName :: Hash -> String
hashName (SHAHash size) = "SHA" ++ show size hashName (SHAHash size) = "SHA" ++ show size
@ -175,3 +177,18 @@ skeinHasher hashsize
| hashsize == 512 = show . skein512 | hashsize == 512 = show . skein512
#endif #endif
| otherwise = error $ "unsupported skein size " ++ show hashsize | otherwise = error $ "unsupported skein size " ++ show hashsize
{- A varient of the SHA256E backend, for testing that needs special keys
- that cannot collide with legitimate keys in the repository.
-
- This is accomplished by appending a special extension to the key,
- that is not one that selectExtension would select (due to being too
- long).
-}
testKeyBackend :: Backend
testKeyBackend =
let b = genBackendE (SHAHash 256)
in b { getKey = (fmap addE) <$$> getKey b }
where
addE k = k { keyName = keyName k ++ longext }
longext = ".this-is-a-test-key"

View file

@ -25,6 +25,9 @@ backend = Backend
, fsckKey = Nothing , fsckKey = Nothing
, canUpgradeKey = Nothing , canUpgradeKey = Nothing
, fastMigrate = Nothing , fastMigrate = Nothing
-- The content of an url can change at any time, so URL keys are
-- not stable.
, isStableKey = const False
} }
{- Every unique url has a corresponding key. -} {- Every unique url has a corresponding key. -}

View file

@ -23,6 +23,7 @@ backend = Backend
, fsckKey = Nothing , fsckKey = Nothing
, canUpgradeKey = Nothing , canUpgradeKey = Nothing
, fastMigrate = Nothing , fastMigrate = Nothing
, isStableKey = const True
} }
{- The key includes the file size, modification time, and the {- The key includes the file size, modification time, and the

View file

@ -96,9 +96,10 @@ import qualified Command.XMPPGit
#endif #endif
import qualified Command.RemoteDaemon import qualified Command.RemoteDaemon
#endif #endif
import qualified Command.Test
#ifdef WITH_TESTSUITE #ifdef WITH_TESTSUITE
import qualified Command.Test
import qualified Command.FuzzTest import qualified Command.FuzzTest
import qualified Command.TestRemote
#endif #endif
#ifdef WITH_EKG #ifdef WITH_EKG
import System.Remote.Monitoring import System.Remote.Monitoring
@ -187,9 +188,10 @@ cmds = concat
#endif #endif
, Command.RemoteDaemon.def , Command.RemoteDaemon.def
#endif #endif
, Command.Test.def
#ifdef WITH_TESTSUITE #ifdef WITH_TESTSUITE
, Command.Test.def
, Command.FuzzTest.def , Command.FuzzTest.def
, Command.TestRemote.def
#endif #endif
] ]

View file

@ -22,7 +22,7 @@ import Test.QuickCheck
import Control.Concurrent import Control.Concurrent
def :: [Command] def :: [Command]
def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionPlumbing def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionTesting
"generates fuzz test files"] "generates fuzz test files"]
seek :: CommandSeek seek :: CommandSeek

View file

@ -13,7 +13,7 @@ import Messages
def :: [Command] def :: [Command]
def = [ noRepo startIO $ dontCheck repoExists $ def = [ noRepo startIO $ dontCheck repoExists $
command "test" paramNothing seek SectionPlumbing command "test" paramNothing seek SectionTesting
"run built-in test suite"] "run built-in test suite"]
seek :: CommandSeek seek :: CommandSeek

196
Command/TestRemote.hs Normal file
View file

@ -0,0 +1,196 @@
{- git-annex command
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.TestRemote where
import Common
import Command
import qualified Annex
import qualified Remote
import qualified Types.Remote as Remote
import Types
import Types.Key (key2file, keyBackendName, keySize)
import Types.Backend (getKey, fsckKey)
import Types.KeySource
import Annex.Content
import Backend
import qualified Backend.Hash
import Utility.Tmp
import Utility.Metered
import Utility.DataUnits
import Utility.CopyFile
import Messages
import Types.Messages
import Remote.Helper.Chunked
import Locations
import Test.Tasty
import Test.Tasty.Runners
import Test.Tasty.HUnit
import Control.Exception
import "crypto-api" Crypto.Random
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
def :: [Command]
def = [ withOptions [sizeOption] $
command "testremote" paramRemote seek SectionTesting
"test transfers to/from a remote"]
sizeOption :: Option
sizeOption = fieldOption [] "size" paramSize "base key size (default 1MiB)"
seek :: CommandSeek
seek ps = do
basesz <- fromInteger . fromMaybe (1024 * 1024)
<$> getOptionField sizeOption (pure . getsize)
withWords (start basesz) ps
where
getsize v = v >>= readSize dataUnits
start :: Int -> [String] -> CommandStart
start basesz ws = do
let name = unwords ws
showStart "testremote" name
r <- either error id <$> Remote.byName' name
showSideAction "generating test keys"
ks <- mapM randKey (keySizes basesz)
rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz)
rs' <- concat <$> mapM encryptionVariants rs
next $ perform rs' ks
perform :: [Remote] -> [Key] -> CommandPerform
perform rs ks = do
st <- Annex.getState id
let tests = testGroup "Remote Tests" $
[ testGroup (desc r k) (test st r k) | k <- ks, r <- rs ]
ok <- case tryIngredients [consoleTestReporter] mempty tests of
Nothing -> error "No tests found!?"
Just act -> liftIO act
next $ cleanup rs ks ok
where
desc r' k = intercalate "; " $ map unwords
[ [ "key size", show (keySize k) ]
, [ show (chunkConfig (Remote.config r')) ]
, ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))]
]
adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote)
adjustChunkSize r chunksize = adjustRemoteConfig r
(M.insert "chunk" (show chunksize))
-- Variants of a remote with no encryption, and with simple shared
-- encryption. Gpg key based encryption is not tested.
encryptionVariants :: Remote -> Annex [Remote]
encryptionVariants r = do
noenc <- adjustRemoteConfig r (M.insert "encryption" "none")
sharedenc <- adjustRemoteConfig r $
M.insert "encryption" "shared" .
M.insert "highRandomQuality" "false"
return $ catMaybes [noenc, sharedenc]
-- Regenerate a remote with a modified config.
adjustRemoteConfig :: Remote -> (Remote.RemoteConfig -> Remote.RemoteConfig) -> Annex (Maybe Remote)
adjustRemoteConfig r adjustconfig = Remote.generate (Remote.remotetype r)
(Remote.repo r)
(Remote.uuid r)
(adjustconfig (Remote.config r))
(Remote.gitconfig r)
test :: Annex.AnnexState -> Remote -> Key -> [TestTree]
test st r k =
[ check "removeKey when not present" remove
, present False
, check "storeKey" store
, present True
, check "storeKey when already present" store
, present True
, check "retrieveKeyFile" $ do
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "retrieveKeyFile resume from 33%" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do
sz <- hFileSize h
L.hGet h $ fromInteger $ sz `div` 3
liftIO $ L.writeFile tmp partial
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "retrieveKeyFile resume from 0" $ do
tmp <- prepTmp k
liftIO $ writeFile tmp ""
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "retrieveKeyFile resume from end" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
void $ liftIO $ copyFileExternal loc tmp
removeAnnex k
get
, check "fsck downloaded object" fsck
, check "removeKey when present" remove
, present False
]
where
check desc a = testCase desc $
Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
present b = check ("present " ++ show b) $
(== Right b) <$> Remote.hasKey r k
fsck = case maybeLookupBackendName (keyBackendName k) of
Nothing -> return True
Just b -> case fsckKey b of
Nothing -> return True
Just fscker -> fscker k (key2file k)
get = getViaTmp k $ \dest ->
Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate
store = Remote.storeKey r k Nothing nullMeterUpdate
remove = Remote.removeKey r k
cleanup :: [Remote] -> [Key] -> Bool -> CommandCleanup
cleanup rs ks ok = do
forM_ rs $ \r -> forM_ ks (Remote.removeKey r)
forM_ ks removeAnnex
return ok
chunkSizes :: Int -> [Int]
chunkSizes base =
[ 0 -- no chunking
, base `div` 100
, base `div` 1000
, base
]
keySizes :: Int -> [Int]
keySizes base = filter (>= 0)
[ 0 -- empty key is a special case when chunking
, base
, base + 1
, base - 1
, base * 2
]
randKey :: Int -> Annex Key
randKey sz = withTmpFile "randkey" $ \f h -> do
gen <- liftIO (newGenIO :: IO SystemRandom)
case genBytes sz gen of
Left e -> error $ "failed to generate random key: " ++ show e
Right (rand, _) -> liftIO $ B.hPut h rand
liftIO $ hClose h
let ks = KeySource
{ keyFilename = f
, contentLocation = f
, inodeCache = Nothing
}
k <- fromMaybe (error "failed to generate random key")
<$> getKey Backend.Hash.testKeyBackend ks
moveAnnex k f
return k

View file

@ -3,16 +3,18 @@
- Currently using gpg; could later be modified to support different - Currently using gpg; could later be modified to support different
- crypto backends if neccessary. - crypto backends if neccessary.
- -
- Copyright 2011-2012 Joey Hess <joey@kitenet.net> - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE Rank2Types #-}
module Crypto ( module Crypto (
Cipher, Cipher,
KeyIds(..), KeyIds(..),
EncKey,
StorableCipher(..), StorableCipher(..),
genEncryptedCipher, genEncryptedCipher,
genSharedCipher, genSharedCipher,
@ -34,6 +36,8 @@ import qualified Data.ByteString.Lazy as L
import Data.ByteString.Lazy.UTF8 (fromString) import Data.ByteString.Lazy.UTF8 (fromString)
import Control.Applicative import Control.Applicative
import qualified Data.Map as M import qualified Data.Map as M
import Control.Monad.IO.Class
import Control.Monad.Catch (MonadMask)
import Common.Annex import Common.Annex
import qualified Utility.Gpg as Gpg import qualified Utility.Gpg as Gpg
@ -138,17 +142,19 @@ decryptCipher (EncryptedCipher t variant _) =
Hybrid -> Cipher Hybrid -> Cipher
PubKey -> MacOnlyCipher PubKey -> MacOnlyCipher
type EncKey = Key -> Key
{- Generates an encrypted form of a Key. The encryption does not need to be {- Generates an encrypted form of a Key. The encryption does not need to be
- reversable, nor does it need to be the same type of encryption used - reversable, nor does it need to be the same type of encryption used
- on content. It does need to be repeatable. -} - on content. It does need to be repeatable. -}
encryptKey :: Mac -> Cipher -> Key -> Key encryptKey :: Mac -> Cipher -> EncKey
encryptKey mac c k = stubKey encryptKey mac c k = stubKey
{ keyName = macWithCipher mac c (key2file k) { keyName = macWithCipher mac c (key2file k)
, keyBackendName = "GPG" ++ showMac mac , keyBackendName = "GPG" ++ showMac mac
} }
type Feeder = Handle -> IO () type Feeder = Handle -> IO ()
type Reader a = Handle -> IO a type Reader m a = Handle -> m a
feedFile :: FilePath -> Feeder feedFile :: FilePath -> Feeder
feedFile f h = L.hPut h =<< L.readFile f feedFile f h = L.hPut h =<< L.readFile f
@ -156,8 +162,8 @@ feedFile f h = L.hPut h =<< L.readFile f
feedBytes :: L.ByteString -> Feeder feedBytes :: L.ByteString -> Feeder
feedBytes = flip L.hPut feedBytes = flip L.hPut
readBytes :: (L.ByteString -> IO a) -> Reader a readBytes :: (MonadIO m) => (L.ByteString -> m a) -> Reader m a
readBytes a h = L.hGetContents h >>= a readBytes a h = liftIO (L.hGetContents h) >>= a
{- Runs a Feeder action, that generates content that is symmetrically {- Runs a Feeder action, that generates content that is symmetrically
- encrypted with the Cipher (unless it is empty, in which case - encrypted with the Cipher (unless it is empty, in which case
@ -165,7 +171,7 @@ readBytes a h = L.hGetContents h >>= a
- read by the Reader action. Note: For public-key encryption, - read by the Reader action. Note: For public-key encryption,
- recipients MUST be included in 'params' (for instance using - recipients MUST be included in 'params' (for instance using
- 'getGpgEncParams'). -} - 'getGpgEncParams'). -}
encrypt :: [CommandParam] -> Cipher -> Feeder -> Reader a -> IO a encrypt :: (MonadIO m, MonadMask m) => [CommandParam] -> Cipher -> Feeder -> Reader m a -> m a
encrypt params cipher = case cipher of encrypt params cipher = case cipher of
Cipher{} -> Gpg.feedRead (params ++ Gpg.stdEncryptionParams True) $ Cipher{} -> Gpg.feedRead (params ++ Gpg.stdEncryptionParams True) $
cipherPassphrase cipher cipherPassphrase cipher
@ -174,7 +180,7 @@ encrypt params cipher = case cipher of
{- Runs a Feeder action, that generates content that is decrypted with the {- Runs a Feeder action, that generates content that is decrypted with the
- Cipher (or using a private key if the Cipher is empty), and read by the - Cipher (or using a private key if the Cipher is empty), and read by the
- Reader action. -} - Reader action. -}
decrypt :: Cipher -> Feeder -> Reader a -> IO a decrypt :: (MonadIO m, MonadMask m) => Cipher -> Feeder -> Reader m a -> m a
decrypt cipher = case cipher of decrypt cipher = case cipher of
Cipher{} -> Gpg.feedRead [Param "--decrypt"] $ cipherPassphrase cipher Cipher{} -> Gpg.feedRead [Param "--decrypt"] $ cipherPassphrase cipher
MacOnlyCipher{} -> Gpg.pipeLazy [Param "--decrypt"] MacOnlyCipher{} -> Gpg.pipeLazy [Param "--decrypt"]

View file

@ -421,6 +421,7 @@ keyPaths key = map (keyPath key) annexHashes
- which do not allow using a directory "XX" when "xx" already exists. - which do not allow using a directory "XX" when "xx" already exists.
- To support that, most repositories use the lower case hash for new data. -} - To support that, most repositories use the lower case hash for new data. -}
type Hasher = Key -> FilePath type Hasher = Key -> FilePath
annexHashes :: [Hasher] annexHashes :: [Hasher]
annexHashes = [hashDirLower, hashDirMixed] annexHashes = [hashDirLower, hashDirMixed]
@ -428,12 +429,12 @@ hashDirMixed :: Hasher
hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
where where
dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d] dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d]
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file k ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower :: Hasher hashDirLower :: Hasher
hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir
where where
dir = take 6 $ md5s $ md5FilePath $ key2file k dir = take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
{- modified version of display_32bits_as_hex from Data.Hash.MD5 {- modified version of display_32bits_as_hex from Data.Hash.MD5
- Copyright (C) 2001 Ian Lynagh - Copyright (C) 2001 Ian Lynagh

View file

@ -15,7 +15,14 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Logs.Chunk where module Logs.Chunk (
ChunkMethod(..),
ChunkSize,
ChunkCount,
chunksStored,
chunksRemoved,
getCurrentChunks,
) where
import Common.Annex import Common.Annex
import Logs import Logs
@ -26,19 +33,19 @@ import Logs.Chunk.Pure
import qualified Data.Map as M import qualified Data.Map as M
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex () chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex ()
chunksStored u k chunksize chunkcount = do chunksStored u k chunkmethod chunkcount = do
ts <- liftIO getPOSIXTime ts <- liftIO getPOSIXTime
Annex.Branch.change (chunkLogFile k) $ Annex.Branch.change (chunkLogFile k) $
showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog showLog . changeMapLog ts (u, chunkmethod) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkSize -> Annex () chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
chunksRemoved u k chunksize = chunksStored u k chunksize 0 chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)] getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)]
getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k) getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k)
where where
select = filter (\(_sz, ct) -> ct > 0) select = filter (\(_m, ct) -> ct > 0)
. map (\((_ku, sz), l) -> (sz, value l)) . map (\((_ku, m), l) -> (m, value l))
. M.toList . M.toList
. M.filterWithKey (\(ku, _sz) _ -> ku == u) . M.filterWithKey (\(ku, _m) _ -> ku == u)

View file

@ -6,7 +6,8 @@
-} -}
module Logs.Chunk.Pure module Logs.Chunk.Pure
( ChunkSize ( ChunkMethod(..)
, ChunkSize
, ChunkCount , ChunkCount
, ChunkLog , ChunkLog
, parseLog , parseLog
@ -17,24 +18,37 @@ import Common.Annex
import Logs.MapLog import Logs.MapLog
import Data.Int import Data.Int
-- Currently chunks are all fixed size, but other chunking methods
-- may be added.
data ChunkMethod = FixedSizeChunks ChunkSize | UnknownChunks String
deriving (Ord, Eq, Show)
type ChunkSize = Int64 type ChunkSize = Int64
-- 0 when chunks are no longer present
type ChunkCount = Integer type ChunkCount = Integer
type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount
parseChunkMethod :: String -> ChunkMethod
parseChunkMethod s = maybe (UnknownChunks s) FixedSizeChunks (readish s)
showChunkMethod :: ChunkMethod -> String
showChunkMethod (FixedSizeChunks sz) = show sz
showChunkMethod (UnknownChunks s) = s
parseLog :: String -> ChunkLog parseLog :: String -> ChunkLog
parseLog = parseMapLog fieldparser valueparser parseLog = parseMapLog fieldparser valueparser
where where
fieldparser s = fieldparser s =
let (u,sz) = separate (== sep) s let (u,m) = separate (== sep) s
in (,) <$> pure (toUUID u) <*> readish sz in Just (toUUID u, parseChunkMethod m)
valueparser = readish valueparser = readish
showLog :: ChunkLog -> String showLog :: ChunkLog -> String
showLog = showMapLog fieldshower valueshower showLog = showMapLog fieldshower valueshower
where where
fieldshower (u, sz) = fromUUID u ++ sep : show sz fieldshower (u, m) = fromUUID u ++ sep : showChunkMethod m
valueshower = show valueshower = show
sep :: Char sep :: Char

View file

@ -1,16 +1,16 @@
{- A "remote" that is just a filesystem directory. {- A "remote" that is just a filesystem directory.
- -
- Copyright 2011-2012 Joey Hess <joey@kitenet.net> - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE Rank2Types #-}
module Remote.Directory (remote) where module Remote.Directory (remote) where
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import qualified Data.Map as M import qualified Data.Map as M
import Common.Annex import Common.Annex
@ -21,10 +21,8 @@ import Config.Cost
import Config import Config
import Utility.FileMode import Utility.FileMode
import Remote.Helper.Special import Remote.Helper.Special
import Remote.Helper.Encryptable import Remote.Helper.ChunkedEncryptable
import Remote.Helper.Chunked import qualified Remote.Directory.LegacyChunked as Legacy
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Crypto
import Annex.Content import Annex.Content
import Annex.UUID import Annex.UUID
import Utility.Metered import Utility.Metered
@ -41,15 +39,15 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot
gen r u c gc = do gen r u c gc = do
cst <- remoteCost gc cheapRemoteCost cst <- remoteCost gc cheapRemoteCost
let chunkconfig = chunkConfig c let chunkconfig = chunkConfig c
return $ Just $ encryptableRemote c return $ Just $ chunkedEncryptableRemote c
(storeEncrypted dir (getGpgEncParams (c,gc)) chunkconfig) (prepareStore dir chunkconfig)
(retrieveEncrypted dir chunkconfig) (retrieve dir chunkconfig)
Remote { Remote {
uuid = u, uuid = u,
cost = cst, cost = cst,
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store dir chunkconfig, storeKey = storeKeyDummy,
retrieveKeyFile = retrieve dir chunkconfig, retrieveKeyFile = retreiveKeyFileDummy,
retrieveKeyFileCheap = retrieveCheap dir chunkconfig, retrieveKeyFileCheap = retrieveCheap dir chunkconfig,
removeKey = remove dir, removeKey = remove dir,
hasKey = checkPresent dir chunkconfig, hasKey = checkPresent dir chunkconfig,
@ -84,125 +82,49 @@ directorySetup mu _ c = do
gitConfigSpecialRemote u c' "directory" absdir gitConfigSpecialRemote u c' "directory" absdir
return (M.delete "directory" c', u) return (M.delete "directory" c', u)
{- Locations to try to access a given Key in the Directory. {- Locations to try to access a given Key in the directory.
- We try more than since we used to write to different hash directories. -} - We try more than one since we used to write to different hash
- directories. -}
locations :: FilePath -> Key -> [FilePath] locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k) locations d k = map (d </>) (keyPaths k)
{- Returns the location off a Key in the directory. If the key is
- present, returns the location that is actually used, otherwise
- returns the first, default location. -}
getLocation :: FilePath -> Key -> IO FilePath
getLocation d k = do
let locs = locations d k
fromMaybe (Prelude.head locs) <$> firstM doesFileExist locs
{- Directory where the file(s) for a key are stored. -} {- Directory where the file(s) for a key are stored. -}
storeDir :: FilePath -> Key -> FilePath storeDir :: FilePath -> Key -> FilePath
storeDir d k = addTrailingPathSeparator $ d </> hashDirLower k </> keyFile k storeDir d k = addTrailingPathSeparator $ d </> hashDirLower k </> keyFile k
{- Where we store temporary data for a key as it's being uploaded. -} {- Where we store temporary data for a key, in the directory, as it's being
- written. -}
tmpDir :: FilePath -> Key -> FilePath tmpDir :: FilePath -> Key -> FilePath
tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool {- Check if there is enough free disk space in the remote's directory to
withCheckedFiles _ _ [] _ _ = return False - store the key. Note that the unencrypted key size is checked. -}
withCheckedFiles check (LegacyChunks _) d k a = go $ locations d k prepareStore :: FilePath -> ChunkConfig -> Preparer Storer
where prepareStore d chunkconfig = checkPrepare
go [] = return False (\k -> checkDiskSpace (Just d) k 0)
go (f:fs) = do (byteStorer $ store d chunkconfig)
let chunkcount = f ++ Legacy.chunkCount
ifM (check chunkcount)
( do
chunks <- Legacy.listChunks f <$> readFile chunkcount
ifM (allM check chunks)
( a chunks , return False )
, do
chunks <- Legacy.probeChunks f check
if null chunks
then go fs
else a chunks
)
withCheckedFiles check _ d k a = go $ locations d k
where
go [] = return False
go (f:fs) = ifM (check f) ( a [f] , go fs )
withStoredFiles :: ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool
withStoredFiles = withCheckedFiles doesFileExist store d chunkconfig k b p = liftIO $ do
void $ tryIO $ createDirectoryIfMissing True tmpdir
store :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool case chunkconfig of
store d chunkconfig k _f p = sendAnnex k (void $ remove d k) $ \src -> LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir
metered (Just p) k $ \meterupdate -> _ -> do
storeHelper d chunkconfig k k $ \dests -> let tmpf = tmpdir </> keyFile k
case chunkconfig of meteredWriteFile p tmpf b
LegacyChunks chunksize ->
storeLegacyChunked meterupdate chunksize dests
=<< L.readFile src
_ -> do
let dest = Prelude.head dests
meteredWriteFile meterupdate dest
=<< L.readFile src
return [dest]
storeEncrypted :: FilePath -> [CommandParam] -> ChunkConfig -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted d gpgOpts chunkconfig (cipher, enck) k p = sendAnnex k (void $ remove d enck) $ \src ->
metered (Just p) k $ \meterupdate ->
storeHelper d chunkconfig enck k $ \dests ->
encrypt gpgOpts cipher (feedFile src) $ readBytes $ \b ->
case chunkconfig of
LegacyChunks chunksize ->
storeLegacyChunked meterupdate chunksize dests b
_ -> do
let dest = Prelude.head dests
meteredWriteFile meterupdate dest b
return [dest]
{- Splits a ByteString into chunks and writes to dests, obeying configured
- chunk size (not to be confused with the L.ByteString chunk size).
- Note: Must always write at least one file, even for empty ByteString. -}
storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call"
storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b
| L.null b = do
-- must always write at least one file, even for empty
L.writeFile firstdest b
return [firstdest]
| otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) []
storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath]
storeLegacyChunked' _ _ [] _ _ = error "ran out of dests"
storeLegacyChunked' _ _ _ [] c = return $ reverse c
storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do
bs' <- withFile d WriteMode $
feed zeroBytesProcessed chunksize bs
storeLegacyChunked' meterupdate chunksize dests bs' (d:c)
where
feed _ _ [] _ = return []
feed bytes sz (l:ls) h = do
let len = S.length l
let s = fromIntegral len
if s <= sz || sz == chunksize
then do
S.hPut h l
let bytes' = addBytesProcessed bytes len
meterupdate bytes'
feed bytes' (sz - s) ls h
else return (l:ls)
storeHelper :: FilePath -> ChunkConfig -> Key -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool
storeHelper d chunkconfig key origkey storer = check <&&> liftIO go
where
tmpdir = tmpDir d key
destdir = storeDir d key
{- An encrypted key does not have a known size,
- so check that the size of the original key is available as free
- space. -}
check = do
liftIO $ createDirectoryIfMissing True tmpdir
checkDiskSpace (Just tmpdir) origkey 0
go = case chunkconfig of
NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do
let tmpf = tmpdir </> keyFile key
void $ storer [tmpf]
finalizer tmpdir destdir finalizer tmpdir destdir
return True return True
UnpaddedChunks _ -> error "TODO: storeHelper with UnpaddedChunks" where
LegacyChunks _ -> Legacy.storeChunks key tmpdir destdir storer recorder finalizer tmpdir = tmpDir d k
destdir = storeDir d k
finalizer tmp dest = do finalizer tmp dest = do
void $ tryIO $ allowWrite dest -- may already exist void $ tryIO $ allowWrite dest -- may already exist
void $ tryIO $ removeDirectoryRecursive dest -- or not exist void $ tryIO $ removeDirectoryRecursive dest -- or not exist
@ -212,38 +134,21 @@ storeHelper d chunkconfig key origkey storer = check <&&> liftIO go
void $ tryIO $ do void $ tryIO $ do
mapM_ preventWrite =<< dirContents dest mapM_ preventWrite =<< dirContents dest
preventWrite dest preventWrite dest
recorder f s = do
void $ tryIO $ allowWrite f
writeFile f s
void $ tryIO $ preventWrite f
retrieve :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool retrieve :: FilePath -> ChunkConfig -> Preparer Retriever
retrieve d chunkconfig k _ f p = metered (Just p) k $ \meterupdate -> retrieve d (LegacyChunks _) = Legacy.retrieve locations d
liftIO $ withStoredFiles chunkconfig d k $ \files -> retrieve d _ = simplyPrepare $ byteRetriever $ \k ->
catchBoolIO $ do liftIO $ L.readFile =<< getLocation d k
meteredWriteFileChunks meterupdate f files L.readFile
return True
retrieveEncrypted :: FilePath -> ChunkConfig -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
retrieveEncrypted d chunkconfig (cipher, enck) k f p = metered (Just p) k $ \meterupdate ->
liftIO $ withStoredFiles chunkconfig d enck $ \files ->
catchBoolIO $ do
decrypt cipher (feeder files) $
readBytes $ meteredWriteFile meterupdate f
return True
where
feeder files h = forM_ files $ L.hPut h <=< L.readFile
retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool
-- no cheap retrieval for chunks -- no cheap retrieval possible for chunks
retrieveCheap _ (UnpaddedChunks _) _ _ = return False retrieveCheap _ (UnpaddedChunks _) _ _ = return False
retrieveCheap _ (LegacyChunks _) _ _ = return False retrieveCheap _ (LegacyChunks _) _ _ = return False
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
retrieveCheap d ck k f = liftIO $ withStoredFiles ck d k go retrieveCheap d NoChunks k f = liftIO $ catchBoolIO $ do
where file <- getLocation d k
go [file] = catchBoolIO $ createSymbolicLink file f >> return True createSymbolicLink file f
go _files = return False return True
#else #else
retrieveCheap _ _ _ _ = return False retrieveCheap _ _ _ _ = return False
#endif #endif
@ -256,12 +161,25 @@ remove d k = liftIO $ do
- before it can delete them. -} - before it can delete them. -}
void $ tryIO $ mapM_ allowWrite =<< dirContents dir void $ tryIO $ mapM_ allowWrite =<< dirContents dir
#endif #endif
catchBoolIO $ do ok <- catchBoolIO $ do
removeDirectoryRecursive dir removeDirectoryRecursive dir
return True return True
{- Removing the subdirectory will fail if it doesn't exist.
- But, we want to succeed in that case, as long as the directory
- remote's top-level directory does exist. -}
if ok
then return ok
else doesDirectoryExist d <&&> (not <$> doesDirectoryExist dir)
where where
dir = storeDir d k dir = storeDir d k
checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool)
checkPresent d chunkconfig k = liftIO $ catchMsgIO $ withStoredFiles chunkconfig d k $ checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k
const $ return True -- withStoredFiles checked that it exists checkPresent d _ k = liftIO $ do
v <- catchMsgIO $ anyM doesFileExist (locations d k)
case v of
Right False -> ifM (doesDirectoryExist d)
( return v
, return $ Left $ "directory " ++ d ++ " is not accessible"
)
_ -> return v

View file

@ -0,0 +1,112 @@
{- Legacy chunksize support for directory special remote.
-
- Can be removed eventually.
-
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE Rank2Types #-}
module Remote.Directory.LegacyChunked where
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import Common.Annex
import Utility.FileMode
import Remote.Helper.ChunkedEncryptable
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Annex.Perms
import Utility.Metered
withCheckedFiles :: (FilePath -> IO Bool) -> FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withCheckedFiles _ [] _locations _ _ = return False
withCheckedFiles check d locations k a = go $ locations d k
where
go [] = return False
go (f:fs) = do
let chunkcount = f ++ Legacy.chunkCount
ifM (check chunkcount)
( do
chunks <- Legacy.listChunks f <$> readFile chunkcount
ifM (allM check chunks)
( a chunks , return False )
, do
chunks <- Legacy.probeChunks f check
if null chunks
then go fs
else a chunks
)
withStoredFiles :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withStoredFiles = withCheckedFiles doesFileExist
{- Splits a ByteString into chunks and writes to dests, obeying configured
- chunk size (not to be confused with the L.ByteString chunk size). -}
storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call"
storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b
| L.null b = do
-- always write at least one file, even for empty
L.writeFile firstdest b
return [firstdest]
| otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) []
storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath]
storeLegacyChunked' _ _ [] _ _ = error "ran out of dests"
storeLegacyChunked' _ _ _ [] c = return $ reverse c
storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do
bs' <- withFile d WriteMode $
feed zeroBytesProcessed chunksize bs
storeLegacyChunked' meterupdate chunksize dests bs' (d:c)
where
feed _ _ [] _ = return []
feed bytes sz (l:ls) h = do
let len = S.length l
let s = fromIntegral len
if s <= sz || sz == chunksize
then do
S.hPut h l
let bytes' = addBytesProcessed bytes len
meterupdate bytes'
feed bytes' (sz - s) ls h
else return (l:ls)
storeHelper :: (FilePath -> FilePath -> IO ()) -> Key -> ([FilePath] -> IO [FilePath]) -> FilePath -> FilePath -> IO Bool
storeHelper finalizer key storer tmpdir destdir = do
void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir
Legacy.storeChunks key tmpdir destdir storer recorder finalizer
where
recorder f s = do
void $ tryIO $ allowWrite f
writeFile f s
void $ tryIO $ preventWrite f
store :: ChunkSize -> (FilePath -> FilePath -> IO ()) -> Key -> L.ByteString -> MeterUpdate -> FilePath -> FilePath -> IO Bool
store chunksize finalizer k b p = storeHelper finalizer k $ \dests ->
storeLegacyChunked p chunksize dests b
{- Need to get a single ByteString containing every chunk.
- Done very innefficiently, by writing to a temp file.
- :/ This is legacy code..
-}
retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> Preparer Retriever
retrieve locations d basek a = do
showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow."
tmpdir <- fromRepo $ gitAnnexTmpMiscDir
createAnnexDirectory tmpdir
let tmp = tmpdir </> keyFile basek ++ ".directorylegacy.tmp"
a $ Just $ byteRetriever $ \k -> liftIO $ do
void $ withStoredFiles d locations k $ \fs -> do
forM_ fs $
S.appendFile tmp <=< S.readFile
return True
b <- L.readFile tmp
nukeFile tmp
return b
checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool)
checkPresent d locations k = liftIO $ catchMsgIO $
withStoredFiles d locations k $
-- withStoredFiles checked that it exists
const $ return True

View file

@ -15,14 +15,12 @@ import Types.CleanupActions
import qualified Git import qualified Git
import Config import Config
import Remote.Helper.Special import Remote.Helper.Special
import Remote.Helper.Encryptable import Remote.Helper.ChunkedEncryptable
import Crypto
import Utility.Metered import Utility.Metered
import Logs.Transfer import Logs.Transfer
import Logs.PreferredContent.Raw import Logs.PreferredContent.Raw
import Logs.RemoteState import Logs.RemoteState
import Config.Cost import Config.Cost
import Annex.Content
import Annex.UUID import Annex.UUID
import Annex.Exception import Annex.Exception
import Creds import Creds
@ -30,7 +28,6 @@ import Creds
import Control.Concurrent.STM import Control.Concurrent.STM
import System.Log.Logger (debugM) import System.Log.Logger (debugM)
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
remote :: RemoteType remote :: RemoteType
remote = RemoteType { remote = RemoteType {
@ -46,15 +43,15 @@ gen r u c gc = do
Annex.addCleanup (RemoteCleanup u) $ stopExternal external Annex.addCleanup (RemoteCleanup u) $ stopExternal external
cst <- getCost external r gc cst <- getCost external r gc
avail <- getAvailability external r gc avail <- getAvailability external r gc
return $ Just $ encryptableRemote c return $ Just $ chunkedEncryptableRemote c
(storeEncrypted external $ getGpgEncParams (c,gc)) (simplyPrepare $ store external)
(retrieveEncrypted external) (simplyPrepare $ retrieve external)
Remote { Remote {
uuid = u, uuid = u,
cost = cst, cost = cst,
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store external, storeKey = storeKeyDummy,
retrieveKeyFile = retrieve external, retrieveKeyFile = retreiveKeyFileDummy,
retrieveKeyFileCheap = \_ _ -> return False, retrieveKeyFileCheap = \_ _ -> return False,
removeKey = remove external, removeKey = remove external,
hasKey = checkPresent external, hasKey = checkPresent external,
@ -90,25 +87,8 @@ externalSetup mu _ c = do
gitConfigSpecialRemote u c'' "externaltype" externaltype gitConfigSpecialRemote u c'' "externaltype" externaltype
return (c'', u) return (c'', u)
store :: External -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool store :: External -> Storer
store external k _f p = sendAnnex k rollback $ \f -> store external = fileStorer $ \k f p ->
metered (Just p) k $
storeHelper external k f
where
rollback = void $ remove external k
storeEncrypted :: External -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted external gpgOpts (cipher, enck) k p = withTmp enck $ \tmp ->
sendAnnex k rollback $ \src -> do
metered (Just p) k $ \meterupdate -> do
liftIO $ encrypt gpgOpts cipher (feedFile src) $
readBytes $ L.writeFile tmp
storeHelper external enck tmp meterupdate
where
rollback = void $ remove external enck
storeHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool
storeHelper external k f p = safely $
handleRequest external (TRANSFER Upload k f) (Just p) $ \resp -> handleRequest external (TRANSFER Upload k f) (Just p) $ \resp ->
case resp of case resp of
TRANSFER_SUCCESS Upload k' | k == k' -> TRANSFER_SUCCESS Upload k' | k == k' ->
@ -119,31 +99,15 @@ storeHelper external k f p = safely $
return False return False
_ -> Nothing _ -> Nothing
retrieve :: External -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool retrieve :: External -> Retriever
retrieve external k _f d p = metered (Just p) k $ retrieve external = fileRetriever $ \d k p ->
retrieveHelper external k d
retrieveEncrypted :: External -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
retrieveEncrypted external (cipher, enck) k f p = withTmp enck $ \tmp ->
metered (Just p) k $ \meterupdate ->
ifM (retrieveHelper external enck tmp meterupdate)
( liftIO $ catchBoolIO $ do
decrypt cipher (feedFile tmp) $
readBytes $ L.writeFile f
return True
, return False
)
retrieveHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool
retrieveHelper external k d p = safely $
handleRequest external (TRANSFER Download k d) (Just p) $ \resp -> handleRequest external (TRANSFER Download k d) (Just p) $ \resp ->
case resp of case resp of
TRANSFER_SUCCESS Download k' TRANSFER_SUCCESS Download k'
| k == k' -> Just $ return True | k == k' -> Just $ return ()
TRANSFER_FAILURE Download k' errmsg TRANSFER_FAILURE Download k' errmsg
| k == k' -> Just $ do | k == k' -> Just $ do
warning errmsg error errmsg
return False
_ -> Nothing _ -> Nothing
remove :: External -> Key -> Annex Bool remove :: External -> Key -> Annex Bool

View file

@ -282,7 +282,8 @@ jobList r keys = go =<< glacierEnv (config r) (uuid r)
then return nada then return nada
else do else do
enckeys <- forM keys $ \k -> enckeys <- forM keys $ \k ->
maybe k snd <$> cipherKey (config r) k maybe k (\(_, enck) -> enck k)
<$> cipherKey (config r)
let keymap = M.fromList $ zip enckeys keys let keymap = M.fromList $ zip enckeys keys
let convert = mapMaybe (`M.lookup` keymap) let convert = mapMaybe (`M.lookup` keymap)
return (convert succeeded, convert failed) return (convert succeeded, convert failed)

View file

@ -1,22 +1,30 @@
{- git-annex chunked remotes {- git-annex chunked remotes
- -
- Copyright 2012-2014 Joey Hess <joey@kitenet.net> - Copyright 2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Remote.Helper.Chunked module Remote.Helper.Chunked (
( ChunkSize ChunkSize,
, ChunkConfig(..) ChunkConfig(..),
, chunkConfig chunkConfig,
, meteredWriteFileChunks storeChunks,
) where removeChunks,
retrieveChunks,
hasKeyChunks,
) where
import Common.Annex import Common.Annex
import Utility.DataUnits import Utility.DataUnits
import Types.StoreRetrieve
import Types.Remote import Types.Remote
import Logs.Chunk.Pure (ChunkSize) import Types.Key
import Logs.Chunk
import Utility.Metered import Utility.Metered
import Crypto (EncKey)
import Backend (isStableKey)
import Annex.Exception
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M import qualified Data.Map as M
@ -25,23 +33,366 @@ data ChunkConfig
= NoChunks = NoChunks
| UnpaddedChunks ChunkSize | UnpaddedChunks ChunkSize
| LegacyChunks ChunkSize | LegacyChunks ChunkSize
deriving (Show)
noChunks :: ChunkConfig -> Bool
noChunks NoChunks = True
noChunks _ = False
chunkConfig :: RemoteConfig -> ChunkConfig chunkConfig :: RemoteConfig -> ChunkConfig
chunkConfig m = chunkConfig m =
case M.lookup "chunksize" m of case M.lookup "chunksize" m of
Nothing -> case M.lookup "chunk" m of Nothing -> case M.lookup "chunk" m of
Nothing -> NoChunks Nothing -> NoChunks
Just v -> UnpaddedChunks $ readsz v "chunk" Just v -> readsz UnpaddedChunks v "chunk"
Just v -> LegacyChunks $ readsz v "chunksize" Just v -> readsz LegacyChunks v "chunksize"
where where
readsz v f = case readSize dataUnits v of readsz c v f = case readSize dataUnits v of
Just size | size > 0 -> fromInteger size Just size
_ -> error ("bad " ++ f) | size == 0 -> NoChunks
| size > 0 -> c (fromInteger size)
_ -> error $ "bad configuration " ++ f ++ "=" ++ v
{- Writes a series of chunks to a file. The feeder is called to get -- An infinite stream of chunk keys, starting from chunk 1.
- each chunk. -} newtype ChunkKeyStream = ChunkKeyStream [Key]
meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
meteredWriteFileChunks meterupdate dest chunks feeder = chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
withBinaryFile dest WriteMode $ \h -> chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
forM_ chunks $ where
meteredWrite meterupdate h <=< feeder mk chunknum = sizedk { keyChunkNum = Just chunknum }
sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
nextChunkKeyStream (ChunkKeyStream []) = undefined -- stream is infinite!
takeChunkKeyStream :: ChunkCount -> ChunkKeyStream -> [Key]
takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l
-- Number of chunks already consumed from the stream.
numChunks :: ChunkKeyStream -> Integer
numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream
{- Splits up the key's content into chunks, passing each chunk to
- the storer action, along with a corresponding chunk key and a
- progress meter update callback.
-
- To support resuming, the checker is used to find the first missing
- chunk key. Storing starts from that chunk.
-
- This buffers each chunk in memory, so can use a lot of memory
- with a large ChunkSize.
- More optimal versions of this can be written, that rely
- on L.toChunks to split the lazy bytestring into chunks (typically
- smaller than the ChunkSize), and eg, write those chunks to a Handle.
- But this is the best that can be done with the storer interface that
- writes a whole L.ByteString at a time.
-}
storeChunks
:: UUID
-> ChunkConfig
-> Key
-> FilePath
-> MeterUpdate
-> (Key -> ContentSource -> MeterUpdate -> Annex Bool)
-> (Key -> Annex (Either String Bool))
-> Annex Bool
storeChunks u chunkconfig k f p storer checker =
case chunkconfig of
(UnpaddedChunks chunksize) | isStableKey k ->
bracketIO open close (go chunksize)
_ -> showprogress $ storer k (FileContent f)
where
showprogress = metered (Just p) k
open = tryIO $ openBinaryFile f ReadMode
close (Right h) = hClose h
close (Left _) = noop
go _ (Left e) = do
warning (show e)
return False
go chunksize (Right h) = showprogress $ \meterupdate -> do
let chunkkeys = chunkKeyStream k chunksize
(chunkkeys', startpos) <- seekResume h chunkkeys checker
b <- liftIO $ L.hGetContents h
gochunks meterupdate startpos chunksize b chunkkeys'
gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool
gochunks meterupdate startpos chunksize = loop startpos . splitchunk
where
splitchunk = L.splitAt chunksize
loop bytesprocessed (chunk, bs) chunkkeys
| L.null chunk && numchunks > 0 = do
-- Once all chunks are successfully
-- stored, update the chunk log.
chunksStored u k (FixedSizeChunks chunksize) numchunks
return True
| otherwise = do
liftIO $ meterupdate' zeroBytesProcessed
let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys
ifM (storer chunkkey (ByteContent chunk) meterupdate')
( do
let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk)
loop bytesprocessed' (splitchunk bs) chunkkeys'
, return False
)
where
numchunks = numChunks chunkkeys
{- The MeterUpdate that is passed to the action
- storing a chunk is offset, so that it reflects
- the total bytes that have already been stored
- in previous chunks. -}
meterupdate' = offsetMeterUpdate meterupdate bytesprocessed
{- Check if any of the chunk keys are present. If found, seek forward
- in the Handle, so it will be read starting at the first missing chunk.
- Returns the ChunkKeyStream truncated to start at the first missing
- chunk, and the number of bytes skipped due to resuming.
-
- As an optimisation, if the file fits into a single chunk, there's no need
- to check if that chunk is present -- we know it's not, because otherwise
- the whole file would be present and there would be no reason to try to
- store it.
-}
seekResume
:: Handle
-> ChunkKeyStream
-> (Key -> Annex (Either String Bool))
-> Annex (ChunkKeyStream, BytesProcessed)
seekResume h chunkkeys checker = do
sz <- liftIO (hFileSize h)
if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys)
then return (chunkkeys, zeroBytesProcessed)
else check 0 chunkkeys sz
where
check pos cks sz
| pos >= sz = do
-- All chunks are already stored!
liftIO $ hSeek h AbsoluteSeek sz
return (cks, toBytesProcessed sz)
| otherwise = do
v <- checker k
case v of
Right True ->
check pos' cks' sz
_ -> do
when (pos > 0) $
liftIO $ hSeek h AbsoluteSeek pos
return (cks, toBytesProcessed pos)
where
(k, cks') = nextChunkKeyStream cks
pos' = pos + fromMaybe 0 (keyChunkSize k)
{- Removes all chunks of a key from a remote, by calling a remover
- action on each.
-
- The remover action should succeed even if asked to
- remove a key that is not present on the remote.
-
- This action may be called on a chunked key. It will simply remove it.
-}
removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> Annex Bool
removeChunks remover u chunkconfig encryptor k = do
ls <- chunkKeys u chunkconfig k
ok <- allM (remover . encryptor) (concat ls)
when ok $ do
let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls
forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral
return ok
{- Retrieves a key from a remote, using a retriever action.
-
- When the remote is chunked, tries each of the options returned by
- chunkKeys until it finds one where the retriever successfully
- gets the first chunked key. The content of that key, and any
- other chunks in the list is fed to the sink.
-
- If retrival of one of the subsequent chunks throws an exception,
- gives up and returns False. Note that partial data may have been
- written to the sink in this case.
-
- Resuming is supported when using chunks. When the destination file
- already exists, it skips to the next chunked key that would be needed
- to resume.
-}
retrieveChunks
:: Retriever
-> UUID
-> ChunkConfig
-> EncKey
-> Key
-> FilePath
-> MeterUpdate
-> (Maybe Handle -> Maybe MeterUpdate -> ContentSource -> Annex Bool)
-> Annex Bool
retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
| noChunks chunkconfig =
-- Optimisation: Try the unchunked key first, to avoid
-- looking in the git-annex branch for chunk counts
-- that are likely not there.
getunchunked `catchNonAsyncAnnex`
const (go =<< chunkKeysOnly u basek)
| otherwise = go =<< chunkKeys u chunkconfig basek
where
go ls = do
currsize <- liftIO $ catchMaybeIO $
toInteger . fileSize <$> getFileStatus dest
let ls' = maybe ls (setupResume ls) currsize
if any null ls'
then return True -- dest is already complete
else firstavail currsize ls' `catchNonAsyncAnnex` giveup
giveup e = do
warning (show e)
return False
firstavail _ [] = return False
firstavail currsize ([]:ls) = firstavail currsize ls
firstavail currsize ((k:ks):ls)
| k == basek = getunchunked
`catchNonAsyncAnnex` (const $ firstavail currsize ls)
| otherwise = do
let offset = resumeOffset currsize k
let p = maybe basep
(offsetMeterUpdate basep . toBytesProcessed)
offset
v <- tryNonAsyncAnnex $
retriever (encryptor k) p $ \content ->
bracketIO (maybe opennew openresume offset) hClose $ \h -> do
void $ tosink (Just h) p content
let sz = toBytesProcessed $
fromMaybe 0 $ keyChunkSize k
getrest p h sz sz ks
`catchNonAsyncAnnex` giveup
case v of
Left e
| null ls -> giveup e
| otherwise -> firstavail currsize ls
Right r -> return r
getrest _ _ _ _ [] = return True
getrest p h sz bytesprocessed (k:ks) = do
let p' = offsetMeterUpdate p bytesprocessed
liftIO $ p' zeroBytesProcessed
ifM (retriever (encryptor k) p' $ tosink (Just h) p')
( getrest p h sz (addBytesProcessed bytesprocessed sz) ks
, giveup "chunk retrieval failed"
)
getunchunked = retriever (encryptor basek) basep $ tosink Nothing basep
opennew = openBinaryFile dest WriteMode
-- Open the file and seek to the start point in order to resume.
openresume startpoint = do
-- ReadWriteMode allows seeking; AppendMode does not.
h <- openBinaryFile dest ReadWriteMode
hSeek h AbsoluteSeek startpoint
return h
{- Progress meter updating is a bit tricky: If the Retriever
- populates a file, it is responsible for updating progress
- as the file is being retrieved.
-
- However, if the Retriever generates a lazy ByteString,
- it is not responsible for updating progress (often it cannot).
- Instead, the sink is passed a meter to update as it consumes
- the ByteString.
-}
tosink h p content = sink h p' content
where
p'
| isByteContent content = Just p
| otherwise = Nothing
{- Can resume when the chunk's offset is at or before the end of
- the dest file. -}
resumeOffset :: Maybe Integer -> Key -> Maybe Integer
resumeOffset Nothing _ = Nothing
resumeOffset currsize k
| offset <= currsize = offset
| otherwise = Nothing
where
offset = chunkKeyOffset k
{- Drops chunks that are already present in a file, based on its size.
- Keeps any non-chunk keys.
-}
setupResume :: [[Key]] -> Integer -> [[Key]]
setupResume ls currsize = map dropunneeded ls
where
dropunneeded [] = []
dropunneeded l@(k:_) = case keyChunkSize k of
Just chunksize | chunksize > 0 ->
genericDrop (currsize `div` chunksize) l
_ -> l
{- Checks if a key is present in a remote. This requires any one
- of the lists of options returned by chunkKeys to all check out
- as being present using the checker action.
-}
hasKeyChunks
:: (Key -> Annex (Either String Bool))
-> UUID
-> ChunkConfig
-> EncKey
-> Key
-> Annex (Either String Bool)
hasKeyChunks checker u chunkconfig encryptor basek
| noChunks chunkconfig =
-- Optimisation: Try the unchunked key first, to avoid
-- looking in the git-annex branch for chunk counts
-- that are likely not there.
ifM ((Right True ==) <$> checker (encryptor basek))
( return (Right True)
, checklists Nothing =<< chunkKeysOnly u basek
)
| otherwise = checklists Nothing =<< chunkKeys u chunkconfig basek
where
checklists Nothing [] = return (Right False)
checklists (Just deferrederror) [] = return (Left deferrederror)
checklists d (l:ls)
| not (null l) = do
v <- checkchunks l
case v of
Left e -> checklists (Just e) ls
Right True -> return (Right True)
Right False -> checklists Nothing ls
| otherwise = checklists d ls
checkchunks :: [Key] -> Annex (Either String Bool)
checkchunks [] = return (Right True)
checkchunks (k:ks) = do
v <- checker (encryptor k)
if v == Right True
then checkchunks ks
else return v
{- A key can be stored in a remote unchunked, or as a list of chunked keys.
- This can be the case whether or not the remote is currently configured
- to use chunking.
-
- It's even possible for a remote to have the same key stored multiple
- times with different chunk sizes!
-
- This finds all possible lists of keys that might be on the remote that
- can be combined to get back the requested key, in order from most to
- least likely to exist.
-}
chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]]
chunkKeys u chunkconfig k = do
l <- chunkKeysOnly u k
return $ if noChunks chunkconfig
then [k] : l
else l ++ [[k]]
chunkKeysOnly :: UUID -> Key -> Annex [[Key]]
chunkKeysOnly u k = map (toChunkList k) <$> getCurrentChunks u k
toChunkList :: Key -> (ChunkMethod, ChunkCount) -> [Key]
toChunkList k (FixedSizeChunks chunksize, chunkcount) =
takeChunkKeyStream chunkcount $ chunkKeyStream k chunksize
toChunkList _ (UnknownChunks _, _) = []

View file

@ -9,6 +9,7 @@ module Remote.Helper.Chunked.Legacy where
import Common.Annex import Common.Annex
import Remote.Helper.Chunked import Remote.Helper.Chunked
import Utility.Metered
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Control.Exception as E import qualified Control.Exception as E
@ -73,7 +74,7 @@ storeChunks key tmp dest storer recorder finalizer = either onerr return
finalizer tmp dest finalizer tmp dest
return (not $ null stored) return (not $ null stored)
onerr e = do onerr e = do
print e warningIO (show e)
return False return False
basef = tmp ++ keyFile key basef = tmp ++ keyFile key
@ -104,7 +105,7 @@ storeChunked chunksize dests storer content = either onerr return
| otherwise = storechunks sz [] dests content | otherwise = storechunks sz [] dests content
onerr e = do onerr e = do
print e warningIO (show e)
return [] return []
storechunks _ _ [] _ = return [] -- ran out of dests storechunks _ _ [] _ = return [] -- ran out of dests
@ -114,3 +115,12 @@ storeChunked chunksize dests storer content = either onerr return
let (chunk, b') = L.splitAt sz b let (chunk, b') = L.splitAt sz b
storer d chunk storer d chunk
storechunks sz (d:useddests) ds b' storechunks sz (d:useddests) ds b'
{- Writes a series of chunks to a file. The feeder is called to get
- each chunk.
-}
meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
meteredWriteFileChunks meterupdate dest chunks feeder =
withBinaryFile dest WriteMode $ \h ->
forM_ chunks $
meteredWrite meterupdate h <=< feeder

View file

@ -0,0 +1,200 @@
{- Remotes that support both chunking and encryption.
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes #-}
module Remote.Helper.ChunkedEncryptable (
Preparer,
Storer,
Retriever,
simplyPrepare,
ContentSource,
checkPrepare,
fileStorer,
byteStorer,
fileRetriever,
byteRetriever,
storeKeyDummy,
retreiveKeyFileDummy,
chunkedEncryptableRemote,
module X
) where
import Common.Annex
import Types.StoreRetrieve
import Types.Remote
import Crypto
import Config.Cost
import Utility.Metered
import Remote.Helper.Chunked as X
import Remote.Helper.Encryptable as X
import Annex.Content
import Annex.Exception
import qualified Data.ByteString.Lazy as L
import Control.Exception (bracket)
-- Use when nothing needs to be done to prepare a helper.
simplyPrepare :: helper -> Preparer helper
simplyPrepare helper _ a = a $ Just helper
-- Use to run a check when preparing a helper.
checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper
checkPrepare checker helper k a = ifM (checker k)
( a (Just helper)
, a Nothing
)
-- A Storer that expects to be provided with a file containing
-- the content of the key to store.
fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer
fileStorer a k (FileContent f) m = a k f m
fileStorer a k (ByteContent b) m = withTmp k $ \f -> do
liftIO $ L.writeFile f b
a k f m
-- A Storer that expects to be provided with a L.ByteString of
-- the content to store.
byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer
byteStorer a k c m = withBytes c $ \b -> a k b m
-- A Retriever that writes the content of a Key to a provided file.
-- It is responsible for updating the progress meter as it retrieves data.
fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a k m callback = do
f <- prepTmp k
a f k m
callback (FileContent f)
-- A Retriever that generates a L.ByteString containing the Key's content.
byteRetriever :: (Key -> Annex L.ByteString) -> Retriever
byteRetriever a k _m callback = callback =<< (ByteContent <$> a k)
{- The base Remote that is provided to chunkedEncryptableRemote
- needs to have storeKey and retreiveKeyFile methods, but they are
- never actually used (since chunkedEncryptableRemote replaces
- them). Here are some dummy ones.
-}
storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool
storeKeyDummy _ _ _ = return False
retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retreiveKeyFileDummy _ _ _ _ = return False
-- Modifies a base Remote to support both chunking and encryption.
chunkedEncryptableRemote
:: RemoteConfig
-> Preparer Storer
-> Preparer Retriever
-> Remote
-> Remote
chunkedEncryptableRemote c preparestorer prepareretriever baser = encr
where
encr = baser
{ storeKey = \k _f p -> cip >>= storeKeyGen k p
, retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p
, retrieveKeyFileCheap = \k d -> cip >>= maybe
(retrieveKeyFileCheap baser k d)
(\_ -> return False)
, removeKey = \k -> cip >>= removeKeyGen k
, hasKey = \k -> cip >>= hasKeyGen k
, cost = maybe
(cost baser)
(const $ cost baser + encryptedRemoteCostAdj)
(extractCipher c)
}
cip = cipherKey c
chunkconfig = chunkConfig c
gpgopts = getGpgEncParams encr
safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False)
-- chunk, then encrypt, then feed to the storer
storeKeyGen k p enc =
safely $ preparestorer k $ safely . go
where
go (Just storer) = sendAnnex k rollback $ \src ->
metered (Just p) k $ \p' ->
storeChunks (uuid baser) chunkconfig k src p'
(storechunk enc storer)
(hasKey baser)
go Nothing = return False
rollback = void $ removeKey encr k
storechunk Nothing storer k content p = storer k content p
storechunk (Just (cipher, enck)) storer k content p =
withBytes content $ \b ->
encrypt gpgopts cipher (feedBytes b) $
readBytes $ \encb ->
storer (enck k) (ByteContent encb) p
-- call retriever to get chunks; decrypt them; stream to dest file
retrieveKeyFileGen k dest p enc =
safely $ prepareretriever k $ safely . go
where
go (Just retriever) = metered (Just p) k $ \p' ->
retrieveChunks retriever (uuid baser) chunkconfig
enck k dest p' (sink dest enc)
go Nothing = return False
enck = maybe id snd enc
removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k
where
enck = maybe id snd enc
remover = removeKey baser
hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k
where
enck = maybe id snd enc
checker = hasKey baser
{- Sink callback for retrieveChunks. Stores the file content into the
- provided Handle, decrypting it first if necessary.
-
- If the remote did not store the content using chunks, no Handle
- will be provided, and it's up to us to open the destination file.
-
- Note that when neither chunking nor encryption is used, and the remote
- provides FileContent, that file only needs to be renamed
- into place. (And it may even already be in the right place..)
-}
sink
:: FilePath
-> Maybe (Cipher, EncKey)
-> Maybe Handle
-> Maybe MeterUpdate
-> ContentSource
-> Annex Bool
sink dest enc mh mp content = do
case (enc, mh, content) of
(Nothing, Nothing, FileContent f)
| f == dest -> noop
| otherwise -> liftIO $ moveFile f dest
(Just (cipher, _), _, ByteContent b) ->
decrypt cipher (feedBytes b) $
readBytes write
(Just (cipher, _), _, FileContent f) -> do
withBytes content $ \b ->
decrypt cipher (feedBytes b) $
readBytes write
liftIO $ nukeFile f
(Nothing, _, FileContent f) -> do
withBytes content write
liftIO $ nukeFile f
(Nothing, _, ByteContent b) -> write b
return True
where
write b = case mh of
Just h -> liftIO $ b `streamto` h
Nothing -> liftIO $ bracket opendest hClose (b `streamto`)
streamto b h = case mp of
Just p -> meteredWrite p h b
Nothing -> L.hPut h b
opendest = openBinaryFile dest WriteMode
withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a
withBytes (ByteContent b) a = a b
withBytes (FileContent f) a = a =<< liftIO (L.readFile f)

View file

@ -66,44 +66,45 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c
c' = foldr M.delete c c' = foldr M.delete c
-- git-annex used to remove 'encryption' as well, since -- git-annex used to remove 'encryption' as well, since
-- it was redundant; we now need to keep it for -- it was redundant; we now need to keep it for
-- public-key incryption, hence we leave it on newer -- public-key encryption, hence we leave it on newer
-- remotes (while being backward-compatible). -- remotes (while being backward-compatible).
[ "keyid", "keyid+", "keyid-", "highRandomQuality" ] [ "keyid", "keyid+", "keyid-", "highRandomQuality" ]
{- Modifies a Remote to support encryption. {- Modifies a Remote to support encryption. -}
- -- TODO: deprecated
- Two additional functions must be provided by the remote,
- to support storing and retrieving encrypted content. -}
encryptableRemote encryptableRemote
:: RemoteConfig :: RemoteConfig
-> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool) -> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool)
-> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool) -> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool)
-> Remote -> Remote
-> Remote -> Remote
encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r
r { { storeKey = \k f p -> cip k >>= maybe
storeKey = store,
retrieveKeyFile = retrieve,
retrieveKeyFileCheap = retrieveCheap,
removeKey = withkey $ removeKey r,
hasKey = withkey $ hasKey r,
cost = maybe
(cost r)
(const $ cost r + encryptedRemoteCostAdj)
(extractCipher c)
}
where
store k f p = cip k >>= maybe
(storeKey r k f p) (storeKey r k f p)
(\enck -> storeKeyEncrypted enck k p) (\v -> storeKeyEncrypted v k p)
retrieve k f d p = cip k >>= maybe , retrieveKeyFile = \k f d p -> cip k >>= maybe
(retrieveKeyFile r k f d p) (retrieveKeyFile r k f d p)
(\enck -> retrieveKeyFileEncrypted enck k d p) (\v -> retrieveKeyFileEncrypted v k d p)
retrieveCheap k d = cip k >>= maybe , retrieveKeyFileCheap = \k d -> cip k >>= maybe
(retrieveKeyFileCheap r k d) (retrieveKeyFileCheap r k d)
(\_ -> return False) (\_ -> return False)
withkey a k = cip k >>= maybe (a k) (a . snd) , removeKey = \k -> cip k >>= maybe
cip = cipherKey c (removeKey r k)
(\(_, enckey) -> removeKey r enckey)
, hasKey = \k -> cip k >>= maybe
(hasKey r k)
(\(_, enckey) -> hasKey r enckey)
, cost = maybe
(cost r)
(const $ cost r + encryptedRemoteCostAdj)
(extractCipher c)
}
where
cip k = do
v <- cipherKey c
return $ case v of
Nothing -> Nothing
Just (cipher, enck) -> Just (cipher, enck k)
{- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex {- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex
- state. -} - state. -}
@ -136,11 +137,11 @@ embedCreds c
| isJust (M.lookup "cipherkeys" c) && isJust (M.lookup "cipher" c) = True | isJust (M.lookup "cipherkeys" c) && isJust (M.lookup "cipher" c) = True
| otherwise = False | otherwise = False
{- Gets encryption Cipher, and encrypted version of Key. -} {- Gets encryption Cipher, and key encryptor. -}
cipherKey :: RemoteConfig -> Key -> Annex (Maybe (Cipher, Key)) cipherKey :: RemoteConfig -> Annex (Maybe (Cipher, EncKey))
cipherKey c k = fmap make <$> remoteCipher c cipherKey c = fmap make <$> remoteCipher c
where where
make ciphertext = (ciphertext, encryptKey mac ciphertext k) make ciphertext = (ciphertext, encryptKey mac ciphertext)
mac = fromMaybe defaultMac $ M.lookup "mac" c >>= readMac mac = fromMaybe defaultMac $ M.lookup "mac" c >>= readMac
{- Stores an StorableCipher in a remote's configuration. -} {- Stores an StorableCipher in a remote's configuration. -}

View file

@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE ScopedTypeVariables, CPP #-} {-# LANGUAGE ScopedTypeVariables #-}
module Remote.WebDAV (remote, davCreds, configUrl) where module Remote.WebDAV (remote, davCreds, configUrl) where
@ -16,11 +16,7 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Control.Exception as E import qualified Control.Exception as E
import qualified Control.Exception.Lifted as EL import qualified Control.Exception.Lifted as EL
#if MIN_VERSION_DAV(0,6,0)
import Network.HTTP.Client (HttpException(..)) import Network.HTTP.Client (HttpException(..))
#else
import Network.HTTP.Conduit (HttpException(..))
#endif
import Network.HTTP.Types import Network.HTTP.Types
import System.Log.Logger (debugM) import System.Log.Logger (debugM)
import System.IO.Error import System.IO.Error
@ -113,7 +109,7 @@ storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString ->
storeHelper r k baseurl user pass b = catchBoolIO $ do storeHelper r k baseurl user pass b = catchBoolIO $ do
mkdirRecursiveDAV tmpurl user pass mkdirRecursiveDAV tmpurl user pass
case chunkconfig of case chunkconfig of
NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do NoChunks -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do
storehttp tmpurl b storehttp tmpurl b
finalizer tmpurl keyurl finalizer tmpurl keyurl
return True return True
@ -140,7 +136,7 @@ retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex
retrieve r k _f d p = metered (Just p) k $ \meterupdate -> retrieve r k _f d p = metered (Just p) k $ \meterupdate ->
davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $
withStoredFiles r k baseurl user pass onerr $ \urls -> do withStoredFiles r k baseurl user pass onerr $ \urls -> do
meteredWriteFileChunks meterupdate d urls $ \url -> do Legacy.meteredWriteFileChunks meterupdate d urls $ \url -> do
mb <- getDAV url user pass mb <- getDAV url user pass
case mb of case mb of
Nothing -> throwIO "download failed" Nothing -> throwIO "download failed"
@ -308,57 +304,37 @@ debugDAV :: DavUrl -> String -> IO ()
debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url
{--------------------------------------------------------------------- {---------------------------------------------------------------------
- Low-level DAV operations, using the new DAV monad when available. - Low-level DAV operations.
---------------------------------------------------------------------} ---------------------------------------------------------------------}
putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO () putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO ()
putDAV url user pass b = do putDAV url user pass b = do
debugDAV "PUT" url debugDAV "PUT" url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ putContentM (contentType, b) goDAV url user pass $ putContentM (contentType, b)
#else
putContent url user pass (contentType, b)
#endif
getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString) getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString)
getDAV url user pass = do getDAV url user pass = do
debugDAV "GET" url debugDAV "GET" url
eitherToMaybe <$> tryNonAsync go eitherToMaybe <$> tryNonAsync go
where where
#if MIN_VERSION_DAV(0,6,0)
go = goDAV url user pass $ snd <$> getContentM go = goDAV url user pass $ snd <$> getContentM
#else
go = snd . snd <$> getPropsAndContent url user pass
#endif
deleteDAV :: DavUrl -> DavUser -> DavPass -> IO () deleteDAV :: DavUrl -> DavUser -> DavPass -> IO ()
deleteDAV url user pass = do deleteDAV url user pass = do
debugDAV "DELETE" url debugDAV "DELETE" url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass delContentM goDAV url user pass delContentM
#else
deleteContent url user pass
#endif
moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO () moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO ()
moveDAV url newurl user pass = do moveDAV url newurl user pass = do
debugDAV ("MOVE to " ++ newurl ++ " from ") url debugDAV ("MOVE to " ++ newurl ++ " from ") url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass $ moveContentM newurl' goDAV url user pass $ moveContentM newurl'
#else
moveContent url newurl' user pass
#endif
where where
newurl' = B8.fromString newurl newurl' = B8.fromString newurl
mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool
mkdirDAV url user pass = do mkdirDAV url user pass = do
debugDAV "MKDIR" url debugDAV "MKDIR" url
#if MIN_VERSION_DAV(0,6,0)
goDAV url user pass mkCol goDAV url user pass mkCol
#else
makeCollection url user pass
#endif
existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool) existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool)
existsDAV url user pass = do existsDAV url user pass = do
@ -366,35 +342,19 @@ existsDAV url user pass = do
either (Left . show) id <$> tryNonAsync check either (Left . show) id <$> tryNonAsync check
where where
ispresent = return . Right ispresent = return . Right
#if MIN_VERSION_DAV(0,6,0)
check = goDAV url user pass $ do check = goDAV url user pass $ do
setDepth Nothing setDepth Nothing
EL.catchJust EL.catchJust
(matchStatusCodeException notFound404) (matchStatusCodeException notFound404)
(getPropsM >> ispresent True) (getPropsM >> ispresent True)
(const $ ispresent False) (const $ ispresent False)
#else
check = E.catchJust
(matchStatusCodeException notFound404)
#if ! MIN_VERSION_DAV(0,4,0)
(getProps url user pass >> ispresent True)
#else
(getProps url user pass Nothing >> ispresent True)
#endif
(const $ ispresent False)
#endif
matchStatusCodeException :: Status -> HttpException -> Maybe () matchStatusCodeException :: Status -> HttpException -> Maybe ()
#if MIN_VERSION_DAV(0,6,0)
matchStatusCodeException want (StatusCodeException s _ _) matchStatusCodeException want (StatusCodeException s _ _)
#else
matchStatusCodeException want (StatusCodeException s _)
#endif
| s == want = Just () | s == want = Just ()
| otherwise = Nothing | otherwise = Nothing
matchStatusCodeException _ _ = Nothing matchStatusCodeException _ _ = Nothing
#if MIN_VERSION_DAV(0,6,0)
goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a
goDAV url user pass a = choke $ evalDAVT url $ do goDAV url user pass a = choke $ evalDAVT url $ do
setResponseTimeout Nothing -- disable default (5 second!) timeout setResponseTimeout Nothing -- disable default (5 second!) timeout
@ -407,4 +367,3 @@ goDAV url user pass a = choke $ evalDAVT url $ do
case x of case x of
Left e -> error e Left e -> error e
Right r -> return r Right r -> return r
#endif

View file

@ -15,9 +15,16 @@ import Types.KeySource
data BackendA a = Backend data BackendA a = Backend
{ name :: String { name :: String
, getKey :: KeySource -> a (Maybe Key) , getKey :: KeySource -> a (Maybe Key)
-- Checks the content of a key.
, fsckKey :: Maybe (Key -> FilePath -> a Bool) , fsckKey :: Maybe (Key -> FilePath -> a Bool)
-- Checks if a key can be upgraded to a better form.
, canUpgradeKey :: Maybe (Key -> Bool) , canUpgradeKey :: Maybe (Key -> Bool)
-- Checks if there is a fast way to migrate a key to a different
-- backend (ie, without re-hashing).
, fastMigrate :: Maybe (Key -> BackendA a -> Maybe Key) , fastMigrate :: Maybe (Key -> BackendA a -> Maybe Key)
-- Checks if a key is known (or assumed) to always refer to the
-- same data.
, isStableKey :: Key -> Bool
} }
instance Show (BackendA a) where instance Show (BackendA a) where

View file

@ -69,6 +69,7 @@ data CommandSection
| SectionMetaData | SectionMetaData
| SectionUtility | SectionUtility
| SectionPlumbing | SectionPlumbing
| SectionTesting
deriving (Eq, Ord, Enum, Bounded) deriving (Eq, Ord, Enum, Bounded)
descSection :: CommandSection -> String descSection :: CommandSection -> String
@ -79,3 +80,4 @@ descSection SectionQuery = "Query commands"
descSection SectionMetaData = "Metadata commands" descSection SectionMetaData = "Metadata commands"
descSection SectionUtility = "Utility commands" descSection SectionUtility = "Utility commands"
descSection SectionPlumbing = "Plumbing commands" descSection SectionPlumbing = "Plumbing commands"
descSection SectionTesting = "Testing commands"

View file

@ -13,6 +13,8 @@ module Types.Key (
stubKey, stubKey,
key2file, key2file,
file2key, file2key,
nonChunkKey,
chunkKeyOffset,
prop_idempotent_key_encode, prop_idempotent_key_encode,
prop_idempotent_key_decode prop_idempotent_key_decode
@ -47,6 +49,19 @@ stubKey = Key
, keyChunkNum = Nothing , keyChunkNum = Nothing
} }
-- Gets the parent of a chunk key.
nonChunkKey :: Key -> Key
nonChunkKey k = k
{ keyChunkSize = Nothing
, keyChunkNum = Nothing
}
-- Where a chunk key is offset within its parent.
chunkKeyOffset :: Key -> Maybe Integer
chunkKeyOffset k = (*)
<$> keyChunkSize k
<*> (pred <$> keyChunkNum k)
fieldSep :: Char fieldSep :: Char
fieldSep = '-' fieldSep = '-'

View file

@ -56,7 +56,9 @@ data RemoteA a = Remote {
name :: RemoteName, name :: RemoteName,
-- Remotes have a use cost; higher is more expensive -- Remotes have a use cost; higher is more expensive
cost :: Cost, cost :: Cost,
-- Transfers a key to the remote. -- Transfers a key's contents from disk to the remote.
-- The key should not appear to be present on the remote until
-- all of its contents have been transferred.
storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool, storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool,
-- Retrieves a key's contents to a file. -- Retrieves a key's contents to a file.
-- (The MeterUpdate does not need to be used if it retrieves -- (The MeterUpdate does not need to be used if it retrieves
@ -64,7 +66,7 @@ data RemoteA a = Remote {
retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a Bool, retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a Bool,
-- retrieves a key's contents to a tmp file, if it can be done cheaply -- retrieves a key's contents to a tmp file, if it can be done cheaply
retrieveKeyFileCheap :: Key -> FilePath -> a Bool, retrieveKeyFileCheap :: Key -> FilePath -> a Bool,
-- removes a key's contents -- removes a key's contents (succeeds if the contents are not present)
removeKey :: Key -> a Bool, removeKey :: Key -> a Bool,
-- Checks if a key is present in the remote; if the remote -- Checks if a key is present in the remote; if the remote
-- cannot be accessed returns a Left error message. -- cannot be accessed returns a Left error message.

37
Types/StoreRetrieve.hs Normal file
View file

@ -0,0 +1,37 @@
{- Types for Storer and Retriever actions for remotes.
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE Rank2Types #-}
module Types.StoreRetrieve where
import Common.Annex
import Utility.Metered
import qualified Data.ByteString.Lazy as L
-- Prepares for and then runs an action that will act on a Key's
-- content, passing it a helper when the preparation is successful.
type Preparer helper = forall a. Key -> (Maybe helper -> Annex a) -> Annex a
-- A source of a Key's content.
data ContentSource
= FileContent FilePath
| ByteContent L.ByteString
isByteContent :: ContentSource -> Bool
isByteContent (ByteContent _) = True
isByteContent (FileContent _) = False
-- Action that stores a Key's content on a remote.
-- Can throw exceptions.
type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool
-- Action that retrieves a Key's content from a remote, passing it to a
-- callback.
-- Throws exception if key is not present, or remote is not accessible.
type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool

View file

@ -11,14 +11,15 @@ module Utility.Gpg where
import Control.Applicative import Control.Applicative
import Control.Concurrent import Control.Concurrent
import Control.Monad.IO.Class
import qualified Data.Map as M import qualified Data.Map as M
import Control.Monad.Catch (bracket, MonadMask)
import Common import Common
import qualified Build.SysConfig as SysConfig import qualified Build.SysConfig as SysConfig
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
import System.Posix.Types import System.Posix.Types
import Control.Exception (bracket)
import System.Path import System.Path
import Utility.Env import Utility.Env
#else #else
@ -104,18 +105,18 @@ pipeStrict params input = do
- -
- Note that to avoid deadlock with the cleanup stage, - Note that to avoid deadlock with the cleanup stage,
- the reader must fully consume gpg's input before returning. -} - the reader must fully consume gpg's input before returning. -}
feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a feedRead :: (MonadIO m, MonadMask m) => [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> m a) -> m a
feedRead params passphrase feeder reader = do feedRead params passphrase feeder reader = do
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
-- pipe the passphrase into gpg on a fd -- pipe the passphrase into gpg on a fd
(frompipe, topipe) <- createPipe (frompipe, topipe) <- liftIO createPipe
void $ forkIO $ do liftIO $ void $ forkIO $ do
toh <- fdToHandle topipe toh <- fdToHandle topipe
hPutStrLn toh passphrase hPutStrLn toh passphrase
hClose toh hClose toh
let Fd pfd = frompipe let Fd pfd = frompipe
let passphrasefd = [Param "--passphrase-fd", Param $ show pfd] let passphrasefd = [Param "--passphrase-fd", Param $ show pfd]
closeFd frompipe `after` go (passphrasefd ++ params) liftIO (closeFd frompipe) `after` go (passphrasefd ++ params)
#else #else
-- store the passphrase in a temp file for gpg -- store the passphrase in a temp file for gpg
withTmpFile "gpg" $ \tmpfile h -> do withTmpFile "gpg" $ \tmpfile h -> do
@ -128,15 +129,24 @@ feedRead params passphrase feeder reader = do
go params' = pipeLazy params' feeder reader go params' = pipeLazy params' feeder reader
{- Like feedRead, but without passphrase. -} {- Like feedRead, but without passphrase. -}
pipeLazy :: [CommandParam] -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a pipeLazy :: (MonadIO m, MonadMask m) => [CommandParam] -> (Handle -> IO ()) -> (Handle -> m a) -> m a
pipeLazy params feeder reader = do pipeLazy params feeder reader = do
params' <- stdParams $ Param "--batch" : params params' <- liftIO $ stdParams $ Param "--batch" : params
withBothHandles createProcessSuccess (proc gpgcmd params') let p = (proc gpgcmd params')
$ \(to, from) -> do { std_in = CreatePipe
void $ forkIO $ do , std_out = CreatePipe
feeder to , std_err = Inherit
hClose to }
reader from bracket (setup p) (cleanup p) go
where
setup = liftIO . createProcess
cleanup p (_, _, _, pid) = liftIO $ forceSuccessProcess p pid
go p = do
let (to, from) = bothHandles p
liftIO $ void $ forkIO $ do
feeder to
hClose to
reader from
{- Finds gpg public keys matching some string. (Could be an email address, {- Finds gpg public keys matching some string. (Could be an email address,
- a key id, or a name; See the section 'HOW TO SPECIFY A USER ID' of - a key id, or a name; See the section 'HOW TO SPECIFY A USER ID' of

View file

@ -16,6 +16,7 @@ import qualified Data.ByteString as S
import System.IO.Unsafe import System.IO.Unsafe
import Foreign.Storable (Storable(sizeOf)) import Foreign.Storable (Storable(sizeOf))
import System.Posix.Types import System.Posix.Types
import Data.Int
{- An action that can be run repeatedly, updating it on the bytes processed. {- An action that can be run repeatedly, updating it on the bytes processed.
- -
@ -23,6 +24,9 @@ import System.Posix.Types
- far, *not* an incremental amount since the last call. -} - far, *not* an incremental amount since the last call. -}
type MeterUpdate = (BytesProcessed -> IO ()) type MeterUpdate = (BytesProcessed -> IO ())
nullMeterUpdate :: MeterUpdate
nullMeterUpdate _ = return ()
{- Total number of bytes processed so far. -} {- Total number of bytes processed so far. -}
newtype BytesProcessed = BytesProcessed Integer newtype BytesProcessed = BytesProcessed Integer
deriving (Eq, Ord, Show) deriving (Eq, Ord, Show)
@ -31,6 +35,10 @@ class AsBytesProcessed a where
toBytesProcessed :: a -> BytesProcessed toBytesProcessed :: a -> BytesProcessed
fromBytesProcessed :: BytesProcessed -> a fromBytesProcessed :: BytesProcessed -> a
instance AsBytesProcessed BytesProcessed where
toBytesProcessed = id
fromBytesProcessed = id
instance AsBytesProcessed Integer where instance AsBytesProcessed Integer where
toBytesProcessed i = BytesProcessed i toBytesProcessed i = BytesProcessed i
fromBytesProcessed (BytesProcessed i) = i fromBytesProcessed (BytesProcessed i) = i
@ -39,6 +47,10 @@ instance AsBytesProcessed Int where
toBytesProcessed i = BytesProcessed $ toInteger i toBytesProcessed i = BytesProcessed $ toInteger i
fromBytesProcessed (BytesProcessed i) = fromInteger i fromBytesProcessed (BytesProcessed i) = fromInteger i
instance AsBytesProcessed Int64 where
toBytesProcessed i = BytesProcessed $ toInteger i
fromBytesProcessed (BytesProcessed i) = fromInteger i
instance AsBytesProcessed FileOffset where instance AsBytesProcessed FileOffset where
toBytesProcessed sz = BytesProcessed $ toInteger sz toBytesProcessed sz = BytesProcessed $ toInteger sz
fromBytesProcessed (BytesProcessed sz) = fromInteger sz fromBytesProcessed (BytesProcessed sz) = fromInteger sz
@ -77,6 +89,13 @@ meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO ()
meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h -> meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h ->
meteredWrite meterupdate h b meteredWrite meterupdate h b
{- Applies an offset to a MeterUpdate. This can be useful when
- performing a sequence of actions, such as multiple meteredWriteFiles,
- that all update a common meter progressively. Or when resuming.
-}
offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate
offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n)
{- This is like L.hGetContents, but after each chunk is read, a meter {- This is like L.hGetContents, but after each chunk is read, a meter
- is updated based on the size of the chunk. - is updated based on the size of the chunk.
- -

View file

@ -31,6 +31,7 @@ module Utility.Process (
stdinHandle, stdinHandle,
stdoutHandle, stdoutHandle,
stderrHandle, stderrHandle,
bothHandles,
processHandle, processHandle,
devNull, devNull,
) where ) where

View file

@ -9,11 +9,12 @@
module Utility.Tmp where module Utility.Tmp where
import Control.Exception (bracket)
import System.IO import System.IO
import System.Directory import System.Directory
import Control.Monad.IfElse import Control.Monad.IfElse
import System.FilePath import System.FilePath
import Control.Monad.IO.Class
import Control.Monad.Catch (bracket, MonadMask)
import Utility.Exception import Utility.Exception
import Utility.FileSystemEncoding import Utility.FileSystemEncoding
@ -42,18 +43,18 @@ viaTmp a file content = bracket setup cleanup use
{- Runs an action with a tmp file located in the system's tmp directory {- Runs an action with a tmp file located in the system's tmp directory
- (or in "." if there is none) then removes the file. -} - (or in "." if there is none) then removes the file. -}
withTmpFile :: Template -> (FilePath -> Handle -> IO a) -> IO a withTmpFile :: (MonadIO m, MonadMask m) => Template -> (FilePath -> Handle -> m a) -> m a
withTmpFile template a = do withTmpFile template a = do
tmpdir <- catchDefaultIO "." getTemporaryDirectory tmpdir <- liftIO $ catchDefaultIO "." getTemporaryDirectory
withTmpFileIn tmpdir template a withTmpFileIn tmpdir template a
{- Runs an action with a tmp file located in the specified directory, {- Runs an action with a tmp file located in the specified directory,
- then removes the file. -} - then removes the file. -}
withTmpFileIn :: FilePath -> Template -> (FilePath -> Handle -> IO a) -> IO a withTmpFileIn :: (MonadIO m, MonadMask m) => FilePath -> Template -> (FilePath -> Handle -> m a) -> m a
withTmpFileIn tmpdir template a = bracket create remove use withTmpFileIn tmpdir template a = bracket create remove use
where where
create = openTempFile tmpdir template create = liftIO $ openTempFile tmpdir template
remove (name, handle) = do remove (name, handle) = liftIO $ do
hClose handle hClose handle
catchBoolIO (removeFile name >> return True) catchBoolIO (removeFile name >> return True)
use (name, handle) = a name handle use (name, handle) = a name handle

9
debian/changelog vendored
View file

@ -1,5 +1,12 @@
git-annex (5.20140718) UNRELEASED; urgency=medium git-annex (5.20140718) UNRELEASED; urgency=medium
* New chunk= option to chunk files stored in special remotes.
Currently supported by: directory, and all external special remotes.
* Partially transferred files are automatically resumed when using
chunked remotes!
* The old chunksize= option is deprecated. Do not use for new remotes.
* Legacy code for directory remotes using the old chunksize= option
will keep them working, but more slowly than before.
* webapp: Automatically install Konqueror integration scripts * webapp: Automatically install Konqueror integration scripts
to get and drop files. to get and drop files.
* repair: Removing bad objects could leave fsck finding no more * repair: Removing bad objects could leave fsck finding no more
@ -8,6 +15,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium
were incompletely repaired before. were incompletely repaired before.
* Fix cost calculation for non-encrypted remotes. * Fix cost calculation for non-encrypted remotes.
* Display exception message when a transfer fails due to an exception. * Display exception message when a transfer fails due to an exception.
* WebDAV: Dropped support for DAV before 0.6.1.
* testremote: New command to test uploads/downloads to a remote.
-- Joey Hess <joeyh@debian.org> Mon, 21 Jul 2014 14:41:26 -0400 -- Joey Hess <joeyh@debian.org> Mon, 21 Jul 2014 14:41:26 -0400

31
doc/chunking.mdwn Normal file
View file

@ -0,0 +1,31 @@
Some [[special_remotes]] have support for breaking large files up into
chunks that are stored on the remote.
This can be useful to work around limitations on the size of files
on the remote.
Chunking also allows for resuming interrupted downloads and uploads.
Note that git-annex has to buffer chunks in memory before they are sent to
a remote. So, using a large chunk size will make it use more memory.
To enable chunking, pass a `chunk=nnMiB` parameter to `git annex
initremote, specifying the chunk size.
Good chunk sizes will depend on the remote, but a good starting place
is probably `1MiB`. Very large chunks are problimatic, both because
git-annex needs to buffer one chunk in memory when uploading, and because
a larger chunk will make resuming interrupted transfers less efficient.
On the other hand, when a file is split into a great many chunks,
there can be increased overhead of making many requests to the remote.
To disable chunking of a remote that was using chunking,
pass `chunk=0` to `git annex enableremote`. Any content already stored on
the remote using chunks will continue to be accessed via chunks, this
just prevents using chunks when storing new content.
To change the chunk size, pass a `chunk=nnMiB` parameter to
`git annex enableremote`. This only affects the chunk sized used when
storing new content.
See also: [[design document|design/assistant/chunks]]

View file

@ -231,6 +231,15 @@ cannot check exact file sizes.
If padding is enabled, gpg compression should be disabled, to not leak If padding is enabled, gpg compression should be disabled, to not leak
clues about how well the files compress and so what kind of file it is. clues about how well the files compress and so what kind of file it is.
## chunk key hashing
A chunk key should hash into the same directory structure as its parent
key. This will avoid lots of extra hash directories when using chunking
with non-encrypted keys.
Won't happen when the key is encrypted, but that is good; hashing to the
same bucket then would allow statistical correlation.
## resuming interupted transfers ## resuming interupted transfers
Resuming interrupted downloads, and uploads are both possible. Resuming interrupted downloads, and uploads are both possible.

View file

@ -101,12 +101,14 @@ The following requests *must* all be supported by the special remote.
Tells the special remote it's time to prepare itself to be used. Tells the special remote it's time to prepare itself to be used.
Only INITREMOTE can come before this. Only INITREMOTE can come before this.
* `TRANSFER STORE|RETRIEVE Key File` * `TRANSFER STORE|RETRIEVE Key File`
Requests the transfer of a key. For Send, the File is the file to upload; Requests the transfer of a key. For STORE, the File is the file to upload;
for Receive the File is where to store the download. for RETRIEVE the File is where to store the download.
Note that the File should not influence the filename used on the remote. Note that the File should not influence the filename used on the remote.
The filename will not contain any whitespace. The filename will not contain any whitespace.
Note that it's important that, while a Key is being stored, CHECKPRESENT
not indicate it's present until all the data has been transferred.
Multiple transfers might be requested by git-annex, but it's fine for the Multiple transfers might be requested by git-annex, but it's fine for the
program to serialize them and only do one at a time. program to serialize them and only do one at a time.
* `CHECKPRESENT Key` * `CHECKPRESENT Key`
Requests the remote check if a key is present in it. Requests the remote check if a key is present in it.
* `REMOVE Key` * `REMOVE Key`
@ -286,7 +288,6 @@ start a new process the next time it needs to use a remote.
the remote. However, \n and probably \0 need to be escaped somehow in the the remote. However, \n and probably \0 need to be escaped somehow in the
file data, which adds complication. file data, which adds complication.
* uuid discovery during INITREMOTE. * uuid discovery during INITREMOTE.
* Support for splitting files into chunks.
* Support for getting and setting the list of urls that can be associated * Support for getting and setting the list of urls that can be associated
with a key. with a key.
* Hook into webapp. Needs a way to provide some kind of prompt to the user * Hook into webapp. Needs a way to provide some kind of prompt to the user

View file

@ -949,12 +949,6 @@ subdirectories).
Merge conflicts between two files that are not annexed will not be Merge conflicts between two files that are not annexed will not be
automatically resolved. automatically resolved.
* `test`
This runs git-annex's built-in test suite.
There are several parameters, provided by Haskell's tasty test framework.
* `remotedaemon` * `remotedaemon`
Detects when network remotes have received git pushes and fetches from them. Detects when network remotes have received git pushes and fetches from them.
@ -963,6 +957,35 @@ subdirectories).
This command is used internally to perform git pulls over XMPP. This command is used internally to perform git pulls over XMPP.
# TESTING COMMANDS
* `test`
This runs git-annex's built-in test suite.
There are several parameters, provided by Haskell's tasty test framework.
Pass --help for details.
* `testremote remote`
This tests a remote by generating some random objects and sending them to
the remote, then redownloading them, removing them from the remote, etc.
It's safe to run in an existing repository (the repository contents are
not altered), although it may perform expensive data transfers.
The --size option can be used to tune the size of the generated objects.
Testing a single remote will use the remote's configuration,
automatically varying the chunk sizes, and with simple shared encryption
enabled and disabled.
* `fuzztest`
Generates random changes to files in the current repository,
for use in testing the assistant. This is dangerous, so it will not
do anything unless --forced.
# OPTIONS # OPTIONS
* `--force` * `--force`

View file

@ -36,3 +36,8 @@ string, but where that would normally encode the bits using the 16 characters
0-9a-f, this instead uses the 32 characters "0123456789zqjxkmvwgpfZQJXKMVWGPF". 0-9a-f, this instead uses the 32 characters "0123456789zqjxkmvwgpfZQJXKMVWGPF".
The first 2 letters of the resulting string are the first directory, and the The first 2 letters of the resulting string are the first directory, and the
second 2 are the second directory. second 2 are the second directory.
## chunk keys
The same hash directory is used for a chunk key as would be used for the
key that it's a chunk of.

View file

@ -25,13 +25,11 @@ remote:
* `keyid` - Specifies the gpg key to use for [[encryption]]. * `keyid` - Specifies the gpg key to use for [[encryption]].
* `chunksize` - Avoid storing files larger than the specified size in the * `chunk` - Enables [[chunking]] when storing large files.
directory. For use on directories on mount points that have file size
limitations. The default is to never chunk files. * `chunksize` - Deprecated version of chunk parameter above.
The value can use specified using any commonly used units. Do not use for new remotes. It is not safe to change the chunksize
Example: `chunksize=100 megabytes` setting of an existing remote.
Note that enabling chunking on an existing remote with non-chunked
files is not recommended; nor is changing the chunksize.
Setup example: Setup example:

View file

@ -128,14 +128,25 @@ while read line; do
STORE) STORE)
# Store the file to a location # Store the file to a location
# based on the key. # based on the key.
# XXX when possible, send PROGRESS # XXX when at all possible, send PROGRESS
calclocation "$key" calclocation "$key"
mkdir -p "$(dirname "$LOC")" mkdir -p "$(dirname "$LOC")"
if runcmd cp "$file" "$LOC"; then # Store in temp file first, so that
# CHECKPRESENT does not see it
# until it is all stored.
mkdir -p "$mydirectory/tmp"
tmp="$mydirectory/tmp/$key"
if runcmd cp "$file" "$tmp" \
&& runcmd mv -f "$tmp" "$LOC"; then
echo TRANSFER-SUCCESS STORE "$key" echo TRANSFER-SUCCESS STORE "$key"
else else
echo TRANSFER-FAILURE STORE "$key" echo TRANSFER-FAILURE STORE "$key"
fi fi
mkdir -p "$(dirname "$LOC")"
# The file may already exist, so
# make sure we can overwrite it.
chmod 644 "$LOC" 2>/dev/null || true
;; ;;
RETRIEVE) RETRIEVE)
# Retrieve from a location based on # Retrieve from a location based on

View file

@ -29,13 +29,11 @@ the webdav remote.
be created as needed. Use of a https URL is strongly be created as needed. Use of a https URL is strongly
encouraged, since HTTP basic authentication is used. encouraged, since HTTP basic authentication is used.
* `chunksize` - Avoid storing files larger than the specified size in * `chunk` - Enables [[chunking]] when storing large files.
WebDAV. For use when the WebDAV server has file size
limitations. The default is to never chunk files. * `chunksize` - Deprecated version of chunk parameter above.
The value can use specified using any commonly used units. Do not use for new remotes. It is not safe to change the chunksize
Example: `chunksize=75 megabytes` setting of an existing remote.
Note that enabling chunking on an existing remote with non-chunked
files is not recommended, nor is changing the chunksize.
Setup example: Setup example:

View file

@ -5,9 +5,9 @@ for providing 50 gb of free storage if you sign up with its Android client.
git-annex can use Box as a [[special remote|special_remotes]]. git-annex can use Box as a [[special remote|special_remotes]].
Recent versions of git-annex make this very easy to set up: Recent versions of git-annex make this very easy to set up:
WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunksize=75mb encryption=shared WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunk=50mb encryption=shared
Note the use of chunksize; Box has a 100 mb maximum file size, and this Note the use of [[chunking]]; Box has a 100 mb maximum file size, and this
breaks up large files into chunks before that limit is reached. breaks up large files into chunks before that limit is reached.
# old davfs2 method # old davfs2 method
@ -58,7 +58,7 @@ Create the special remote, in your git-annex repository.
** This example is non-encrypted; fill in your gpg key ID for a securely ** This example is non-encrypted; fill in your gpg key ID for a securely
encrypted special remote! ** encrypted special remote! **
git annex initremote box.com type=directory directory=/media/box.com chunksize=2mb encryption=none git annex initremote box.com type=directory directory=/media/box.com chunk=2mb encryption=none
Now git-annex can copy files to box.com, get files from it, etc, just like Now git-annex can copy files to box.com, get files from it, etc, just like
with any other special remote. with any other special remote.

View file

@ -124,7 +124,7 @@ Executable git-annex
if flag(TestSuite) if flag(TestSuite)
Build-Depends: tasty (>= 0.7), tasty-hunit, tasty-quickcheck, tasty-rerun, Build-Depends: tasty (>= 0.7), tasty-hunit, tasty-quickcheck, tasty-rerun,
optparse-applicative optparse-applicative, crypto-api
CPP-Options: -DWITH_TESTSUITE CPP-Options: -DWITH_TESTSUITE
if flag(TDFA) if flag(TDFA)
@ -142,7 +142,7 @@ Executable git-annex
CPP-Options: -DWITH_S3 CPP-Options: -DWITH_S3
if flag(WebDAV) if flag(WebDAV)
Build-Depends: DAV ((>= 0.3 && < 0.6) || > 0.6), Build-Depends: DAV (> 0.6),
http-client, http-conduit, http-types, lifted-base http-client, http-conduit, http-types, lifted-base
CPP-Options: -DWITH_WEBDAV CPP-Options: -DWITH_WEBDAV