Merge branch 'master' into assistant

This commit is contained in:
Joey Hess 2012-07-01 21:00:43 -04:00
commit 7625319c2c
35 changed files with 526 additions and 93 deletions

View file

@ -128,7 +128,7 @@ newState gitrepo = AnnexState
{- Makes an Annex state object for the specified git repo.
- Ensures the config is read, if it was not already. -}
new :: Git.Repo -> IO AnnexState
new gitrepo = newState <$> Git.Config.read gitrepo
new = newState <$$> Git.Config.read
{- performs an action in the Annex monad -}
run :: AnnexState -> Annex a -> IO (a, AnnexState)

View file

@ -94,7 +94,7 @@ performRemote key file backend numcopies remote =
( return True
, ifM (Annex.getState Annex.fast)
( return False
, Remote.retrieveKeyFile remote key tmp
, Remote.retrieveKeyFile remote key Nothing tmp
)
)

View file

@ -12,6 +12,7 @@ import Command
import qualified Remote
import Annex.Content
import qualified Command.Move
import Logs.Transfer
def :: [Command]
def = [withOptions [Command.Move.fromOption] $ command "get" paramPaths seek
@ -25,24 +26,24 @@ start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, _) = stopUnless (not <$> inAnnex key) $
autoCopies file key (<) $ \_numcopies ->
case from of
Nothing -> go $ perform key
Nothing -> go $ perform key file
Just src ->
-- get --from = copy --from
stopUnless (Command.Move.fromOk src key) $
go $ Command.Move.fromPerform src False key
go $ Command.Move.fromPerform src False key file
where
go a = do
showStart "get" file
next a
next a
perform :: Key -> CommandPerform
perform key = stopUnless (getViaTmp key $ getKeyFile key) $
perform :: Key -> FilePath -> CommandPerform
perform key file = stopUnless (getViaTmp key $ getKeyFile key file) $
next $ return True -- no cleanup needed
{- Try to find a copy of the file in one of the remotes,
- and copy it to here. -}
getKeyFile :: Key -> FilePath -> Annex Bool
getKeyFile key file = dispatch =<< Remote.keyPossibilities key
getKeyFile :: Key -> FilePath -> FilePath -> Annex Bool
getKeyFile key file dest = dispatch =<< Remote.keyPossibilities key
where
dispatch [] = do
showNote "not available"
@ -64,7 +65,7 @@ getKeyFile key file = dispatch =<< Remote.keyPossibilities key
| Remote.hasKeyCheap r =
either (const False) id <$> Remote.hasKey r key
| otherwise = return True
docopy r continue = do
docopy r continue = download (Remote.uuid r) key (Just file) $ do
showAction $ "from " ++ Remote.name r
ifM (Remote.retrieveKeyFile r key file)
ifM (Remote.retrieveKeyFile r key (Just file) dest)
( return True , continue)

View file

@ -16,6 +16,7 @@ import qualified Remote
import Annex.UUID
import qualified Option
import Logs.Presence
import Logs.Transfer
def :: [Command]
def = [withOptions options $ command "move" paramPaths seek
@ -68,9 +69,9 @@ toStart dest move file key = do
then stop -- not here, so nothing to do
else do
showMoveAction move file
next $ toPerform dest move key
toPerform :: Remote -> Bool -> Key -> CommandPerform
toPerform dest move key = moveLock move key $ do
next $ toPerform dest move key file
toPerform :: Remote -> Bool -> Key -> FilePath -> CommandPerform
toPerform dest move key file = moveLock move key $ do
-- Checking the remote is expensive, so not done in the start step.
-- In fast mode, location tracking is assumed to be correct,
-- and an explicit check is not done, when copying. When moving,
@ -88,7 +89,8 @@ toPerform dest move key = moveLock move key $ do
stop
Right False -> do
showAction $ "to " ++ Remote.name dest
ok <- Remote.storeKey dest key
ok <- upload (Remote.uuid dest) key (Just file) $
Remote.storeKey dest key (Just file)
if ok
then finish
else do
@ -118,7 +120,7 @@ fromStart src move file key
where
go = stopUnless (fromOk src key) $ do
showMoveAction move file
next $ fromPerform src move key
next $ fromPerform src move key file
fromOk :: Remote -> Key -> Annex Bool
fromOk src key
| Remote.hasKeyCheap src =
@ -129,13 +131,14 @@ fromOk src key
u <- getUUID
remotes <- Remote.keyPossibilities key
return $ u /= Remote.uuid src && elem src remotes
fromPerform :: Remote -> Bool -> Key -> CommandPerform
fromPerform src move key = moveLock move key $
fromPerform :: Remote -> Bool -> Key -> FilePath -> CommandPerform
fromPerform src move key file = moveLock move key $
ifM (inAnnex key)
( handle move True
, do
, download (Remote.uuid src) key (Just file) $ do
showAction $ "from " ++ Remote.name src
ok <- getViaTmp key $ Remote.retrieveKeyFile src key
ok <- getViaTmp key $
Remote.retrieveKeyFile src key (Just file)
handle move ok
)
where

View file

@ -31,6 +31,7 @@ import Logs.Trust
import Remote
import Config
import Utility.Percentage
import Logs.Transfer
-- a named computation that produces a statistic
type Stat = StatState (Maybe (String, StatState String))
@ -70,6 +71,7 @@ fast_stats =
, remote_list SemiTrusted "semitrusted"
, remote_list UnTrusted "untrusted"
, remote_list DeadTrusted "dead"
, transfer_list
, disk_size
]
slow_stats :: [Stat]
@ -170,6 +172,24 @@ bloom_info = stat "bloom filter size" $ json id $ do
return $ size ++ note
transfer_list :: Stat
transfer_list = stat "transfers in progress" $ nojson $ lift $ do
uuidmap <- Remote.remoteMap id
ts <- getTransfers
if null ts
then return "none"
else return $ pp uuidmap "" $ sort ts
where
pp _ c [] = c
pp uuidmap c ((t, i):xs) = "\n\t" ++ line uuidmap t i ++ pp uuidmap c xs
line uuidmap t i = unwords
[ show (transferDirection t) ++ "ing"
, fromMaybe (show $ transferKey t) (associatedFile i)
, if transferDirection t == Upload then "to" else "from"
, maybe (fromUUID $ transferRemote t) Remote.name $
M.lookup (transferRemote t) uuidmap
]
disk_size :: Stat
disk_size = stat "available local disk space" $ json id $ lift $
calcfree

View file

@ -26,6 +26,7 @@ import Utility.SafeCommand as X
import Utility.Path as X
import Utility.Directory as X
import Utility.Monad as X
import Utility.Applicative as X
import Utility.FileSystemEncoding as X
import Utility.PartialPrelude as X

View file

@ -18,6 +18,7 @@ module Locations (
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexTransferDir,
gitAnnexJournalDir,
gitAnnexJournalLock,
gitAnnexIndex,
@ -127,6 +128,11 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
{- .git/annex/transfer/ is used is used to record keys currently
- being transferred. -}
gitAnnexTransferDir :: Git.Repo -> FilePath
gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r </> "transfer"
{- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -}
gitAnnexJournalDir :: Git.Repo -> FilePath

View file

@ -48,7 +48,7 @@ addLog file line = Annex.Branch.change file $ \s ->
{- Reads a log file.
- Note that the LogLines returned may be in any order. -}
readLog :: FilePath -> Annex [LogLine]
readLog file = parseLog <$> Annex.Branch.get file
readLog = parseLog <$$> Annex.Branch.get
{- Parses a log file. Unparseable lines are ignored. -}
parseLog :: String -> [LogLine]

167
Logs/Transfer.hs Normal file
View file

@ -0,0 +1,167 @@
{- git-annex transfer information files
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Transfer where
import Common.Annex
import Annex.Perms
import Annex.Exception
import qualified Git
import Types.Remote
import Control.Concurrent
import System.Posix.Types
import Data.Time.Clock
{- Enough information to uniquely identify a transfer, used as the filename
- of the transfer information file. -}
data Transfer = Transfer
{ transferDirection :: Direction
, transferRemote :: UUID
, transferKey :: Key
}
deriving (Show, Eq, Ord)
{- Information about a Transfer, stored in the transfer information file. -}
data TransferInfo = TransferInfo
{ startedTime :: UTCTime
, transferPid :: Maybe ProcessID
, transferThread :: Maybe ThreadId
, bytesComplete :: Maybe Integer
, associatedFile :: Maybe FilePath
}
deriving (Show, Eq, Ord)
data Direction = Upload | Download
deriving (Eq, Ord)
instance Show Direction where
show Upload = "upload"
show Download = "download"
readDirection :: String -> Maybe Direction
readDirection "upload" = Just Upload
readDirection "download" = Just Download
readDirection _ = Nothing
upload :: UUID -> Key -> AssociatedFile -> Annex a -> Annex a
upload u key file a = transfer (Transfer Upload u key) file a
download :: UUID -> Key -> AssociatedFile -> Annex a -> Annex a
download u key file a = transfer (Transfer Download u key) file a
{- Runs a transfer action. Creates and locks the transfer information file
- while the action is running. Will throw an error if the transfer is
- already in progress.
-}
transfer :: Transfer -> Maybe FilePath -> Annex a -> Annex a
transfer t file a = do
tfile <- fromRepo $ transferFile t
createAnnexDirectory $ takeDirectory tfile
mode <- annexFileMode
info <- liftIO $ TransferInfo
<$> getCurrentTime
<*> pure Nothing -- pid not stored in file, so omitted for speed
<*> pure Nothing -- threadid not stored in file, so omitted for speed
<*> pure Nothing -- not 0; transfer may be resuming
<*> pure file
bracketIO (prep tfile mode info) (cleanup tfile) a
where
prep tfile mode info = do
fd <- openFd tfile ReadWrite (Just mode)
defaultFileFlags { trunc = True }
locked <- catchMaybeIO $
setLock fd (WriteLock, AbsoluteSeek, 0, 0)
when (locked == Nothing) $
error $ "transfer already in progress"
h <- fdToHandle fd
hPutStr h $ writeTransferInfo info
hFlush h
return h
cleanup tfile h = do
removeFile tfile
hClose h
{- If a transfer is still running, returns its TransferInfo. -}
checkTransfer :: Transfer -> Annex (Maybe TransferInfo)
checkTransfer t = do
mode <- annexFileMode
tfile <- fromRepo $ transferFile t
mfd <- liftIO $ catchMaybeIO $
openFd tfile ReadOnly (Just mode) defaultFileFlags
case mfd of
Nothing -> return Nothing -- failed to open file; not running
Just fd -> do
locked <- liftIO $
getLock fd (WriteLock, AbsoluteSeek, 0, 0)
case locked of
Nothing -> do
liftIO $ closeFd fd
return Nothing
Just (pid, _) -> liftIO $ do
h <- fdToHandle fd
info <- readTransferInfo pid
<$> hGetContentsStrict h
hClose h
return info
{- Gets all currently running transfers. -}
getTransfers :: Annex [(Transfer, TransferInfo)]
getTransfers = do
transfers <- catMaybes . map parseTransferFile <$> findfiles
infos <- mapM checkTransfer transfers
return $ map (\(t, Just i) -> (t, i)) $
filter running $ zip transfers infos
where
findfiles = liftIO . dirContentsRecursive
=<< fromRepo gitAnnexTransferDir
running (_, i) = isJust i
{- The transfer information file to use for a given Transfer. -}
transferFile :: Transfer -> Git.Repo -> FilePath
transferFile (Transfer direction u key) r = gitAnnexTransferDir r
</> show direction
</> fromUUID u
</> keyFile key
{- Parses a transfer information filename to a Transfer. -}
parseTransferFile :: FilePath -> Maybe Transfer
parseTransferFile file =
case drop (length bits - 3) bits of
[direction, u, key] -> Transfer
<$> readDirection direction
<*> pure (toUUID u)
<*> fileKey key
_ -> Nothing
where
bits = splitDirectories file
writeTransferInfo :: TransferInfo -> String
writeTransferInfo info = unlines
-- transferPid is not included; instead obtained by looking at
-- the process that locks the file.
-- transferThread is not included; not relevant for other processes
[ show $ startedTime info
-- bytesComplete is not included; changes too fast
, fromMaybe "" $ associatedFile info -- comes last; arbitrary content
]
readTransferInfo :: ProcessID -> String -> Maybe TransferInfo
readTransferInfo pid s =
case bits of
[time] -> TransferInfo
<$> readish time
<*> pure (Just pid)
<*> pure Nothing
<*> pure Nothing
<*> pure filename
_ -> Nothing
where
(bits, filebits) = splitAt 1 $ lines s
filename
| null filebits = Nothing
| otherwise = Just $ join "\n" filebits

View file

@ -108,8 +108,8 @@ bupSplitParams r buprepo k src = do
return $ bupParams "split" buprepo
(os ++ [Param "-n", Param (bupRef k), src])
store :: Git.Repo -> BupRepo -> Key -> Annex Bool
store r buprepo k = do
store :: Git.Repo -> BupRepo -> Key -> AssociatedFile -> Annex Bool
store r buprepo k _f = do
src <- inRepo $ gitAnnexLocation k
params <- bupSplitParams r buprepo k (File src)
liftIO $ boolSystem "bup" params
@ -122,11 +122,11 @@ storeEncrypted r buprepo (cipher, enck) k = do
withEncryptedHandle cipher (L.readFile src) $ \h ->
pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> FilePath -> Annex Bool
retrieve buprepo k f = do
retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> Annex Bool
retrieve buprepo k _f d = do
let params = bupParams "join" buprepo [Param $ bupRef k]
liftIO $ catchBoolIO $ do
tofile <- openFile f WriteMode
tofile <- openFile d WriteMode
pipeBup params Nothing (Just tofile)
retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool

View file

@ -122,8 +122,8 @@ withCheckedFiles check (Just _) d k a = go $ locations d k
withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withStoredFiles = withCheckedFiles doesFileExist
store :: FilePath -> ChunkSize -> Key -> Annex Bool
store d chunksize k = do
store :: FilePath -> ChunkSize -> Key -> AssociatedFile -> Annex Bool
store d chunksize k _f = do
src <- inRepo $ gitAnnexLocation k
metered k $ \meterupdate ->
storeHelper d chunksize k $ \dests ->
@ -242,8 +242,8 @@ storeHelper d chunksize key a = prep <&&> check <&&> go
preventWrite dir
return (not $ null stored)
retrieve :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
retrieve d chunksize k f = metered k $ \meterupdate ->
retrieve :: FilePath -> ChunkSize -> Key -> AssociatedFile -> FilePath -> Annex Bool
retrieve d chunksize k _ f = metered k $ \meterupdate ->
liftIO $ withStoredFiles chunksize d k $ \files ->
catchBoolIO $ do
meteredWriteFile' meterupdate f files feeder
@ -272,7 +272,7 @@ retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go
remove :: FilePath -> ChunkSize -> Key -> Annex Bool
remove d chunksize k = liftIO $ withStoredFiles chunksize d k go
where
go files = all id <$> mapM removefile files
go = all id <$$> mapM removefile
removefile file = catchBoolIO $ do
let dir = parentDir file
allowWrite dir

View file

@ -21,6 +21,7 @@ import qualified Git.Config
import qualified Git.Construct
import qualified Annex
import Logs.Presence
import Logs.Transfer
import Annex.UUID
import qualified Annex.Content
import qualified Annex.BranchState
@ -219,14 +220,19 @@ dropKey r key
]
{- Tries to copy a key's content from a remote's annex to a file. -}
copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemote r key file
copyFromRemote :: Git.Repo -> Key -> AssociatedFile -> FilePath -> Annex Bool
copyFromRemote r key file dest
| not $ Git.repoIsUrl r = guardUsable r False $ do
params <- rsyncParams r
loc <- liftIO $ gitAnnexLocation key r
rsyncOrCopyFile params loc file
| Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file
| Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file
u <- getUUID
-- run copy from perspective of remote
liftIO $ onLocal r $ do
ensureInitialized
loc <- inRepo $ gitAnnexLocation key
upload u key file $
rsyncOrCopyFile params loc dest
| Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key dest
| Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) dest
| otherwise = error "copying from non-ssh, non-http repo not supported"
copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool
@ -236,23 +242,25 @@ copyFromRemoteCheap r key file
liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
| Git.repoIsSsh r =
ifM (Annex.Content.preseedTmp key file)
( copyFromRemote r key file
( copyFromRemote r key Nothing file
, return False
)
| otherwise = return False
{- Tries to copy a key's content to a remote's annex. -}
copyToRemote :: Git.Repo -> Key -> Annex Bool
copyToRemote r key
copyToRemote :: Git.Repo -> Key -> AssociatedFile -> Annex Bool
copyToRemote r key file
| not $ Git.repoIsUrl r = guardUsable r False $ commitOnCleanup r $ do
keysrc <- inRepo $ gitAnnexLocation key
params <- rsyncParams r
u <- getUUID
-- run copy from perspective of remote
liftIO $ onLocal r $ do
ensureInitialized
Annex.Content.saveState True `after`
Annex.Content.getViaTmp key
(rsyncOrCopyFile params keysrc)
download u key file $
Annex.Content.saveState True `after`
Annex.Content.getViaTmp key
(rsyncOrCopyFile params keysrc)
| Git.repoIsSsh r = commitOnCleanup r $ do
keysrc <- inRepo $ gitAnnexLocation key
rsyncHelper =<< rsyncParamsRemote r False key keysrc

View file

@ -59,14 +59,14 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
cost = cost r + encryptedRemoteCostAdj
}
where
store k = cip k >>= maybe
(storeKey r k)
store k f = cip k >>= maybe
(storeKey r k f)
(`storeKeyEncrypted` k)
retrieve k f = cip k >>= maybe
(retrieveKeyFile r k f)
(\enck -> retrieveKeyFileEncrypted enck k f)
retrieveCheap k f = cip k >>= maybe
(retrieveKeyFileCheap r k f)
retrieve k f d = cip k >>= maybe
(retrieveKeyFile r k f d)
(\enck -> retrieveKeyFileEncrypted enck k d)
retrieveCheap k d = cip k >>= maybe
(retrieveKeyFileCheap r k d)
(\_ -> return False)
withkey a k = cip k >>= maybe (a k) (a . snd)
cip = cipherKey c

View file

@ -27,8 +27,8 @@ addHooks' r Nothing Nothing = r
addHooks' r starthook stophook = r'
where
r' = r
{ storeKey = \k -> wrapper $ storeKey r k
, retrieveKeyFile = \k f -> wrapper $ retrieveKeyFile r k f
{ storeKey = \k f -> wrapper $ storeKey r k f
, retrieveKeyFile = \k f d -> wrapper $ retrieveKeyFile r k f d
, retrieveKeyFileCheap = \k f -> wrapper $ retrieveKeyFileCheap r k f
, removeKey = \k -> wrapper $ removeKey r k
, hasKey = \k -> wrapper $ hasKey r k

View file

@ -101,8 +101,8 @@ runHook hooktype hook k f a = maybe (return False) run =<< lookupHook hooktype h
return False
)
store :: String -> Key -> Annex Bool
store h k = do
store :: String -> Key -> AssociatedFile -> Annex Bool
store h k _f = do
src <- inRepo $ gitAnnexLocation k
runHook h "store" k (Just src) $ return True
@ -112,8 +112,8 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
runHook h "store" enck (Just tmp) $ return True
retrieve :: String -> Key -> FilePath -> Annex Bool
retrieve h k f = runHook h "retrieve" k (Just f) $ return True
retrieve :: String -> Key -> AssociatedFile -> FilePath -> Annex Bool
retrieve h k _f d = runHook h "retrieve" k (Just d) $ return True
retrieveCheap :: String -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False

View file

@ -99,8 +99,8 @@ rsyncUrls o k = map use annexHashes
use h = rsyncUrl o </> h k </> rsyncEscape o (f </> f)
f = keyFile k
store :: RsyncOpts -> Key -> Annex Bool
store o k = rsyncSend o k <=< inRepo $ gitAnnexLocation k
store :: RsyncOpts -> Key -> AssociatedFile -> Annex Bool
store o k _f = rsyncSend o k <=< inRepo $ gitAnnexLocation k
storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
@ -108,8 +108,8 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
rsyncSend o enck tmp
retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o
retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool
retrieve o k _ f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o
-- use inplace when retrieving to support resuming
[ Param "--inplace"
, Param u
@ -117,11 +117,11 @@ retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o
]
retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieveCheap o k f = ifM (preseedTmp k f) ( retrieve o k f , return False )
retrieveCheap o k f = ifM (preseedTmp k f) ( retrieve o k undefined f , return False )
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) _ f = withTmp enck $ \tmp -> do
ifM (retrieve o enck tmp)
ifM (retrieve o enck undefined tmp)
( liftIO $ catchBoolIO $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
return True

View file

@ -113,8 +113,8 @@ s3Setup u c = handlehost $ M.lookup "host" c
-- be human-readable
M.delete "bucket" defaults
store :: Remote -> Key -> Annex Bool
store r k = s3Action r False $ \(conn, bucket) -> do
store :: Remote -> Key -> AssociatedFile -> Annex Bool
store r k _f = s3Action r False $ \(conn, bucket) -> do
dest <- inRepo $ gitAnnexLocation k
res <- liftIO $ storeHelper (conn, bucket) r k dest
s3Bool res
@ -149,12 +149,12 @@ storeHelper (conn, bucket) r k file = do
xheaders = filter isxheader $ M.assocs $ fromJust $ config r
isxheader (h, _) = "x-amz-" `isPrefixOf` h
retrieve :: Remote -> Key -> FilePath -> Annex Bool
retrieve r k f = s3Action r False $ \(conn, bucket) -> do
retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> Annex Bool
retrieve r k _f d = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket k
case res of
Right o -> do
liftIO $ L.writeFile f $ obj_data o
liftIO $ L.writeFile d $ obj_data o
return True
Left e -> s3Warning e

View file

@ -51,21 +51,21 @@ gen r _ _ =
remotetype = remote
}
downloadKey :: Key -> FilePath -> Annex Bool
downloadKey key file = get =<< getUrls key
downloadKey :: Key -> AssociatedFile -> FilePath -> Annex Bool
downloadKey key _file dest = get =<< getUrls key
where
get [] = do
warning "no known url"
return False
get urls = do
showOutput -- make way for download progress bar
downloadUrl urls file
downloadUrl urls dest
downloadKeyCheap :: Key -> FilePath -> Annex Bool
downloadKeyCheap _ _ = return False
uploadKey :: Key -> Annex Bool
uploadKey _ = do
uploadKey :: Key -> AssociatedFile -> Annex Bool
uploadKey _ _ = do
warning "upload to web not supported"
return False

View file

@ -33,6 +33,9 @@ data RemoteTypeA a = RemoteType {
instance Eq (RemoteTypeA a) where
x == y = typename x == typename y
{- A filename associated with a Key, for display to user. -}
type AssociatedFile = Maybe FilePath
{- An individual remote. -}
data RemoteA a = Remote {
-- each Remote has a unique uuid
@ -42,9 +45,9 @@ data RemoteA a = Remote {
-- Remotes have a use cost; higher is more expensive
cost :: Int,
-- Transfers a key to the remote.
storeKey :: Key -> a Bool,
storeKey :: Key -> AssociatedFile -> a Bool,
-- retrieves a key's contents to a file
retrieveKeyFile :: Key -> FilePath -> a Bool,
retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> a Bool,
-- retrieves a key's contents to a tmp file, if it can be done cheaply
retrieveKeyFileCheap :: Key -> FilePath -> a Bool,
-- removes a key's contents

16
Utility/Applicative.hs Normal file
View file

@ -0,0 +1,16 @@
{- applicative stuff
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Utility.Applicative where
{- Like <$> , but supports one level of currying.
-
- foo v = bar <$> action v == foo = bar <$$> action
-}
(<$$>) :: Functor f => (a -> b) -> (c -> f a) -> c -> f b
f <$$> v = fmap f . v
infixr 4 <$$>

12
debian/changelog vendored
View file

@ -1,4 +1,12 @@
git-annex (3.20120625) UNRELEASED; urgency=low
git-annex (3.20120630) UNRELEASED; urgency=low
* get, move, copy: Now refuse to do anything when the requested file
transfer is already in progress by another process.
* status: Lists transfers that are currently in progress.
-- Joey Hess <joeyh@debian.org> Sun, 01 Jul 2012 15:04:37 -0400
git-annex (3.20120629) unstable; urgency=low
* cabal: Only try to use inotify on Linux.
* Version build dependency on STM, and allow building without it,
@ -11,7 +19,7 @@ git-annex (3.20120625) UNRELEASED; urgency=low
in their names.
* sync: Automatically resolves merge conflicts.
-- Joey Hess <joeyh@debian.org> Mon, 25 Jun 2012 11:38:12 -0400
-- Joey Hess <joeyh@debian.org> Fri, 29 Jun 2012 10:17:49 -0400
git-annex (3.20120624) unstable; urgency=low

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus"
nickname="Jimmy"
subject="comment 2"
date="2012-06-29T12:02:48Z"
content="""
Doing,
sudo sysctl -w kern.maxfilesperproc=400000
Somewhat works for me, git-annex watch at least starts up and takes a while to scan the directory, but it's not ideal. Also, creating files seems to work okay, when I remove a file the changes don't seem to get pushed across my other repos, running a sync on the remote repo fixes things.
"""]]

View file

@ -0,0 +1,28 @@
When having "git annex watch" running, unlocking files causes the watcher
to immediately lock/commit them.
----
Possible approaches:
* The watcher could detect unlocked files by checking if newly added files
are a typechange of a file already in git. But this would add git overhead
to every file add.
* `git annex unlock` could add some type of flag file, which the assistant
could check. This would work fine, for users who want to use `git annex
unlock` with the assistant. That's probably not simple enough for most
users, though.
* There could be a UI in the assistant to pick a file and unlock it.
The assistant would have its own list of files it knows are unlocked.
But I'm trying to avoid mandatory UI to use the assistant.
* Perhaps instead, have a directory, like "edit". The assistant could notice
when files move into this special directory, and automatically unlock them.
Then when they're moved out, automatically commit them.
* Alternatively, files that are moved out of the repository entirely could be
automatically unlocked, and then when they're moved back in, it would
automatically do the right thing. This may be worth implementing in
combination with the "edit" directory, as different use cases would work
better with one or the other. However, I don't currently get inotify
events when files are moved out of the repository (well, I do, but it
just says "file moved", with no forwarding address, so I don't know
how to find the file to unlock it.

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus"
nickname="Jimmy"
subject="comment 1"
date="2012-06-28T13:39:18Z"
content="""
That is a known problem/bug which is listed at [[design/assistant/inotify]]
"""]]

View file

@ -0,0 +1,22 @@
Today is a planning day. I have only a few days left before I'm off to
Nicaragua for [DebConf](http://debconf12.debconf.org/), where I'll only
have smaller chunks of time without interruptions. So it's important to get
some well-defined smallish chunks designed that I can work on later. See
bulleted action items below (now moved to [[syncing]]. Each
should be around 1-2 hours unless it turns out to be 8 hours... :)
First, worked on writing down a design, and some data types, for data transfer
tracking (see [[syncing]] page). Found that writing down these simple data
types before I started slinging code has clarified things a lot for me.
Most importantly, I realized that I will need to modify `git-annex-shell`
to record on disk what transfers it's doing, so the assistant can get that
information and use it to both avoid redundant transfers (potentially a big
problem!), and later to allow the user to control them using the web app.
While eventually the user will be able to use the web app to prioritize
transfers, stop and start, throttle, etc, it's important to get the default
behavior right. So I'm thinking about things like how to prioritize uploads
vs downloads, when it's appropriate to have multiple downloads running at
once, etc.

View file

@ -0,0 +1,28 @@
Worked today on two action items from my last blog post:
* on-disk transfers in progress information files (read/write/enumerate)
* locking for the files, so redundant transfer races can be detected,
and failed transfers noticed
That's all done, and used by the `get`, `copy`, and `move` subcommands.
Also, I made `git-annex status` use that information to display any
file transfers that are currently in progress:
joey@gnu:~/lib/sound/misc>git annex status
[...]
transfers in progress:
downloading Vic-303.mp3 from leech
(Webapp, here we come!)
However... Files being sent or received by `git-annex-shell` don't yet
have this transfer info recorded. The problem is that to do so,
`git-annex-shell` will need to be run with a `--remote=` parameter. But
old versions will of course fail when run with such an unknown parameter.
This is a problem I last faced in December 2011 when adding the `--uuid=`
parameter. That time I punted and required the remote `git-annex-shell` be
updated to a new enough version to accept it. But as git-annex gets more widely
used and packaged, that's becoming less an option. I need to find a real
solution to this problem.

View file

@ -8,13 +8,15 @@ available!
* If a file is checked into git as a normal file and gets modified
(or merged, etc), it will be converted into an annexed file.
See [[blog/day_7__bugfixes]]
See [[blog/day_7__bugfixes]].
* When you `git annex unlock` a file, it will immediately be re-locked.
See [[bugs/watcher_commits_unlocked_files]].
* Kqueue has to open every directory it watches, so too many directories
will run it out of the max number of open files (typically 1024), and fail.
I may need to fork off multiple watcher processes to handle this.
See [[bugs/Issue_on_OSX_with_some_system_limits]].
## beyond Linux
@ -42,6 +44,8 @@ I'd also like to support OSX and if possible the BSDs.
* [man page](http://www.freebsd.org/cgi/man.cgi?query=kqueue&apropos=0&sektion=0&format=html)
* <https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py> (good example program)
*kqueue is now supported*
* hfsevents ([haskell bindings](http://hackage.haskell.org/package/hfsevents))
is OSX specific.
@ -71,9 +75,6 @@ I'd also like to support OSX and if possible the BSDs.
- honor .gitignore, not adding files it excludes (difficult, probably
needs my own .gitignore parser to avoid excessive running of git commands
to check for ignored files)
- Possibly, when a directory is moved out of the annex location,
unannex its contents. (Does inotify tell us where the directory moved
to so we can access it?)
## the races
@ -125,6 +126,17 @@ Many races need to be dealt with by this code. Here are some of them.
Not a problem; The removal event removes the old file from the index, and
the add event adds the new one.
* Symlink appears, but is then deleted before it can be processed.
Leads to an ugly message, otherwise no problem:
./me: readSymbolicLink: does not exist (No such file or directory)
Here `me` is a file that was in a conflicted merge, which got
removed as part of the resolution. This is probably coming from the watcher
thread, which sees the newly added symlink (created by the git merge),
but finds it deleted (by the conflict resolver) by the time it processes it.
## done
- on startup, add any files that have appeared since last run **done**

View file

@ -9,6 +9,6 @@ To get this info for downloads, git-annex can watch the file as it arrives
and use its size.
TODO: What about uploads? Will i have to parse rsync's progresss output?
Feed it via a named pipe? Ugh.
Feed it via a named pipe? Ugh. Check into librsync.
This is one of those potentially hidden but time consuming problems.

View file

@ -1,6 +1,37 @@
Once files are added (or removed or moved), need to send those changes to
all the other git clones, at both the git level and the key/value level.
## action items
* on-disk transfers in progress information files (read/write/enumerate)
**done**
* locking for the files, so redundant transfer races can be detected,
and failed transfers noticed **done**
* transfer info for git-annex-shell (problem: how to add a switch
with the necessary info w/o breaking backwards compatability?)
* update files as transfers proceed. See [[progressbars]]
(updating for downloads is easy; for uploads is hard)
* add Transfer queue TChan
* enqueue Transfers (Uploads) as new files are added to the annex by
Watcher.
* enqueue Tranferrs (Downloads) as new dangling symlinks are noticed by
Watcher.
* add TransferInfo Map to DaemonStatus for tracking transfers in progress.
* Poll transfer in progress info files for changes (use inotify again!
wow! hammer, meet nail..), and update the TransferInfo Map
* Write basic Transfer handling thread. Multiple such threads need to be
able to be run at once. Each will need its own independant copy of the
Annex state monad.
* Write transfer control thread, which decides when to launch transfers.
* At startup, and possibly periodically, look for files we have that
location tracking indicates remotes do not, and enqueue Uploads for
them. Also, enqueue Downloads for any files we're missing.
* Find a way to probe available outgoing bandwidth, to throttle so
we don't bufferbloat the network to death.
* git-annex needs a simple speed control knob, which can be plumbed
through to, at least, rsync. A good job for an hour in an
airport somewhere.
## git syncing
1. Can use `git annex sync`, which already handles bidirectional syncing.
@ -45,6 +76,46 @@ and with appropriate rate limiting and control facilities.
This probably will need lots of refinements to get working well.
### first pass: flood syncing
Before mapping the network, the best we can do is flood all files out to every
reachable remote. This is worth doing first, since it's the simplest way to
get the basic functionality of the assistant to work. And we'll need this
anyway.
### transfer tracking
* Upload added to queue by the watcher thread when it adds content.
* Download added to queue by the watcher thread when it seens new symlinks
that lack content.
* Transfer threads started/stopped as necessary to move data.
(May sometimes want multiple threads downloading, or uploading, or even both.)
type TransferQueue = TChan [Transfer]
-- add (M.Map Transfer TransferInfo) to DaemonStatus
startTransfer :: Transfer -> Annex TransferID
stopTransfer :: TransferID -> IO ()
The assistant needs to find out when `git-annex-shell` is receiving or
sending (triggered by another remote), so it can add data for those too.
This is important to avoid uploading content to a remote that is already
downloading it from us, or vice versa, as well as to in future let the web
app manage transfers as user desires.
For files being received, it can see the temp file, but other than lsof
there's no good way to find the pid (and I'd rather not kill blindly).
For files being sent, there's no filesystem indication. So git-annex-shell
(and other git-annex transfer processes) should write a status file to disk.
Can use file locking on these status files to claim upload/download rights,
which will avoid races.
This status file can also be updated periodically to show amount of transfer
complete (necessary for tracking uploads).
## other considerations
This assumes the network is connected. It's often not, so the

View file

@ -18,6 +18,7 @@ others need some manual work. See [[install]] for details.
The git repository has some branches:
* `assistant` contains the new change-tracking daemon
* `ghc7.0` supports versions of ghc older than 7.4, which
had a major change to filename encoding.
* `old-monad-control` is for systems that don't have a newer monad-control
@ -25,6 +26,7 @@ The git repository has some branches:
* `no-ifelse` avoids using the IFelse library
(merge it into master if you need it)
* `no-bloom` avoids using bloom filters. (merge it into master if you need it)
* `no-s3` avoids using the S3 library (merge it into master if you need it)
* `debian-stable` contains the latest backport of git-annex to Debian
stable.
* `tweak-fetch` adds support for the git tweak-fetch hook, which has

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawnHrjHxJAm39x8DR4bnbazQO6H0nMNuY9c"
nickname="Damien"
subject="sha256 alternative"
date="2012-06-30T14:34:11Z"
content="""
in reply to comment 6: On my Mac (10.7.4) there's `/usr/bin/shasum -a 256 <file>` command that will produce the same output as `sha256sum <file>`.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawnHrjHxJAm39x8DR4bnbazQO6H0nMNuY9c"
nickname="Damien"
subject="gnu commands"
date="2012-07-01T17:03:57Z"
content="""
…and another approach to the same problem: apparently git-annex also relies on the GNU coreutils (for instance, when doing `git annex get .`, `cp` complains about `illegal option -- -`). I do have the GNU coreutils installed with Homebrew, but they are all prefixed with `g`. So maybe you should try `gsha256sum` and `gcp` before `sha256sum` and `cp`, that seems like a more general solution.
"""]]

View file

@ -1,11 +0,0 @@
git-annex 3.20120605 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* sync: Show a nicer message if a user tries to sync to a special remote.
* lock: Reset unlocked file to index, rather than to branch head.
* import: New subcommand, pulls files from a directory outside the annex
and adds them.
* Fix display of warning message when encountering a file that uses an
unsupported backend.
* Require that the SHA256 backend can be used when building, since it's the
default.
* Preserve parent environment when running hooks of the hook special remote."""]]

View file

@ -0,0 +1,12 @@
git-annex 3.20120629 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* cabal: Only try to use inotify on Linux.
* Version build dependency on STM, and allow building without it,
which disables the watch command.
* Avoid ugly failure mode when moving content from a local repository
that is not available.
* Got rid of the last place that did utf8 decoding.
* Accept arbitrarily encoded repository filepaths etc when reading
git config output. This fixes support for remotes with unusual characters
in their names.
* sync: Automatically resolves merge conflicts."""]]

View file

@ -1,5 +1,5 @@
Name: git-annex
Version: 3.20120625
Version: 3.20120629
Cabal-Version: >= 1.8
License: GPL
Maintainer: Joey Hess <joey@kitenet.net>