multicast: New command, uses uftp to multicast annexed files, for eg a classroom setting.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-03-30 19:32:58 -04:00
parent 39e8433d46
commit c3970f6c1a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 454 additions and 2 deletions

41
Annex/Multicast.hs Normal file
View file

@ -0,0 +1,41 @@
{- git-annex multicast receive callback
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Multicast where
import Config.Files
import Utility.Env
import Utility.PartialPrelude
import System.Process
import System.IO
import GHC.IO.Handle.FD
multicastReceiveEnv :: String
multicastReceiveEnv = "GIT_ANNEX_MULTICAST_RECEIVE"
multicastCallbackEnv :: IO (FilePath, [(String, String)], Handle)
multicastCallbackEnv = do
gitannex <- readProgramFile
(rfd, wfd) <- createPipeFd
rh <- fdToHandle rfd
environ <- addEntry multicastReceiveEnv (show wfd) <$> getEnvironment
return (gitannex, environ, rh)
-- This is run when uftpd has received a file. Rather than move
-- the file into the annex here, which would require starting up the
-- Annex monad, parsing git config, and verifying the content, simply
-- output to the specified FD the filename. This keeps the time
-- that uftpd is not receiving the next file as short as possible.
runMulticastReceive :: [String] -> String -> IO ()
runMulticastReceive ("-I":_sessionid:fs) hs = case readish hs of
Just fd -> do
h <- fdToHandle fd
mapM_ (hPutStrLn h) fs
hClose h
Nothing -> return ()
runMulticastReceive _ _ = return ()

View file

@ -2,6 +2,8 @@ git-annex (6.20170322) UNRELEASED; urgency=medium
* When a http remote does not expose an annex.uuid config, only warn * When a http remote does not expose an annex.uuid config, only warn
about it once, not every time git-annex is run. about it once, not every time git-annex is run.
* multicast: New command, uses uftp to multicast annexed files, for eg
a classroom setting.
-- Joey Hess <id@joeyh.name> Wed, 29 Mar 2017 12:41:46 -0400 -- Joey Hess <id@joeyh.name> Wed, 29 Mar 2017 12:41:46 -0400

View file

@ -14,6 +14,7 @@ import CmdLine
import Command import Command
import Utility.Env import Utility.Env
import Annex.Ssh import Annex.Ssh
import Annex.Multicast
import Types.Test import Types.Test
import qualified Command.Help import qualified Command.Help
@ -53,6 +54,7 @@ import qualified Command.Describe
import qualified Command.InitRemote import qualified Command.InitRemote
import qualified Command.EnableRemote import qualified Command.EnableRemote
import qualified Command.EnableTor import qualified Command.EnableTor
import qualified Command.Multicast
import qualified Command.Expire import qualified Command.Expire
import qualified Command.Repair import qualified Command.Repair
import qualified Command.Unused import qualified Command.Unused
@ -144,6 +146,7 @@ cmds testoptparser testrunner =
, Command.InitRemote.cmd , Command.InitRemote.cmd
, Command.EnableRemote.cmd , Command.EnableRemote.cmd
, Command.EnableTor.cmd , Command.EnableTor.cmd
, Command.Multicast.cmd
, Command.Reinject.cmd , Command.Reinject.cmd
, Command.Unannex.cmd , Command.Unannex.cmd
, Command.Uninit.cmd , Command.Uninit.cmd
@ -242,4 +245,5 @@ run testoptparser testrunner args = go envmodes
envmodes = envmodes =
[ (sshOptionsEnv, runSshOptions args) [ (sshOptionsEnv, runSshOptions args)
, (sshAskPassEnv, runSshAskPass) , (sshAskPassEnv, runSshAskPass)
, (multicastReceiveEnv, runMulticastReceive args)
] ]

253
Command/Multicast.hs Normal file
View file

@ -0,0 +1,253 @@
{- git-annex command
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Command.Multicast where
import Command
import Logs.Multicast
import Annex.Multicast
import Annex.WorkTree
import Annex.Content
import Annex.UUID
#ifndef mingw32_HOST_OS
import Creds
import Annex.Perms
import Utility.FileMode
#endif
import qualified Limit
import Types.FileMatcher
import qualified Git.LsFiles as LsFiles
import Utility.Hash
import Utility.Tmp
import Config
import Data.Char
import qualified Data.ByteString.Lazy.UTF8 as B8
import qualified Data.Map as M
import Control.Concurrent.Async
cmd :: Command
cmd = command "multicast" SectionCommon "multicast file distribution"
paramNothing (seek <$$> optParser)
data MultiCastAction
= GenAddress
| Send
| Receive
deriving (Show)
data MultiCastOptions = MultiCastOptions MultiCastAction [CommandParam] [FilePath]
deriving (Show)
optParser :: CmdParamsDesc -> Parser MultiCastOptions
optParser _ = MultiCastOptions
<$> (genaddressp <|> sendp <|> receivep)
<*> many uftpopt
<*> cmdParams paramPaths
where
genaddressp = flag' GenAddress
( long "gen-address"
<> help "generate multicast encryption key and store address in git-annex branch"
)
sendp = flag' Send
( long "send"
<> help "multicast files"
)
receivep = flag' Receive
( long "receive"
<> help "listen for multicast files and store in repository"
)
uftpopt = Param <$> strOption
( long "uftp-opt"
<> short 'U'
<> help "passed on to uftp/uftpd"
<> metavar "OPTION"
)
seek :: MultiCastOptions -> CommandSeek
seek (MultiCastOptions GenAddress _ _) = commandAction genAddress
seek (MultiCastOptions Send ups fs) = commandAction $ send ups fs
seek (MultiCastOptions Receive ups []) = commandAction $ receive ups
seek (MultiCastOptions Receive _ _) = giveup "Cannot specify list of files with --receive; this receives whatever files the sender chooses to send."
genAddress :: CommandStart
genAddress = do
showStart "gen-address" ""
k <- uftpKey
(s, ok) <- case k of
KeyContainer s -> liftIO $ genkey (Param s)
KeyFile f -> do
createAnnexDirectory (takeDirectory f)
liftIO $ nukeFile f
liftIO $ protectedOutput $ genkey (File f)
case (ok, parseFingerprint s) of
(False, _) -> giveup $ "uftp_keymgt failed: " ++ s
(_, Nothing) -> giveup $ "Failed to find fingerprint in uftp_keymgt output: " ++ s
(True, Just fp) -> next $ next $ do
recordFingerprint fp =<< getUUID
return True
where
-- Annoyingly, the fingerprint is output to stderr.
genkey p = processTranscript "uftp_keymgt" ps Nothing
where
ps = toCommand $
[ Param "-g"
, keyparam
, p
]
-- uftp only supports rsa up to 2048 which is on the lower
-- limit of secure RSA key sizes. Instead, use an EC curve.
-- Except for on Windows XP, secp521r1 is supported on all
-- platforms by uftp. DJB thinks it's pretty good compared
-- with other NIST curves: "there's one standard NIST curve
-- using a nice prime, namely 25211 but the sheer size of this
-- prime makes it much slower than NIST P-256"
-- (http://blog.cr.yp.to/20140323-ecdsa.html)
-- Since this key is only used to set up the block encryption,
-- its slow speed is ok.
keyparam = Param "ec:secp521r1"
parseFingerprint :: String -> Maybe Fingerprint
parseFingerprint = Fingerprint <$$> lastMaybe . filter isfingerprint . words
where
isfingerprint s =
let os = filter (all isHexDigit) (splitc ':' s)
in length os == 20
send :: [CommandParam] -> [FilePath] -> CommandStart
send ups fs = withTmpFile "send" $ \t h -> do
-- Need to be able to send files with the names of git-annex
-- keys, and uftp does not allow renaming the files that are sent.
-- In a direct mode repository, the annex objects do not have
-- the names of keys, and would have to be copied, which is too
-- expensive.
whenM isDirect $
giveup "Sorry, multicast send cannot be done from a direct mode repository."
showStart "generating file list" ""
fs' <- seekHelper LsFiles.inRepo fs
matcher <- Limit.getMatcher
let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $
liftIO $ hPutStrLn h o
forM_ fs' $ \f -> do
mk <- lookupFile f
case mk of
Nothing -> noop
Just k -> withObjectLoc k (addlist f) (const noop)
liftIO $ hClose h
showEndOk
showStart "sending files" ""
showOutput
serverkey <- uftpKey
u <- getUUID
withAuthList $ \authlist -> do
let ps =
-- Force client authentication.
[ Param "-c"
, Param "-Y", Param "aes256-cbc"
, Param "-h", Param "sha512"
-- Picked ecdh_ecdsa for perfect forward secrecy,
-- and because a EC key exchange algorithm is
-- needed since all keys are EC.
, Param "-e", Param "ecdh_ecdsa"
, Param "-k", uftpKeyParam serverkey
, Param "-U", Param (uftpUID u)
-- only allow clients on the authlist
, Param "-H", Param ("@"++authlist)
-- pass in list of files to send
, Param "-i", File t
] ++ ups
liftIO (boolSystem "uftp" ps) >>= showEndResult
stop
receive :: [CommandParam] -> CommandStart
receive ups = do
showStart "receiving multicast files" ""
showNote "Will continue to run until stopped by ctrl-c"
showOutput
clientkey <- uftpKey
u <- getUUID
(callback, environ, statush) <- liftIO multicastCallbackEnv
tmpobjdir <- fromRepo gitAnnexTmpObjectDir
createAnnexDirectory tmpobjdir
withTmpDirIn tmpobjdir "multicast" $ \tmpdir -> withAuthList $ \authlist ->
abstmpdir <- liftIO $ absPath tmpdir
abscallback <- liftIO $ searchPath callback
let ps =
-- Avoid it running as a daemon.
[ Param "-d"
-- Require encryption.
, Param "-E"
, Param "-k", uftpKeyParam clientkey
, Param "-U", Param (uftpUID u)
-- Only allow servers on the authlist
, Param "-S", Param authlist
-- Receive files into tmpdir
-- (it needs an absolute path)
, Param "-D", File abstmpdir
-- Run callback after each file received
-- (it needs an absolute path)
, Param "-s", Param (fromMaybe callback abscallback)
] ++ ups
runner <- liftIO $ async $
hClose statush
`after` boolSystemEnv "uftpd" ps (Just environ)
mapM_ storeReceived . lines =<< liftIO (hGetContents statush)
showEndResult =<< liftIO (wait runner)
stop
storeReceived :: FilePath -> Annex ()
storeReceived f = do
case file2key (takeFileName f) of
Nothing -> do
warning $ "Received a file " ++ f ++ " that is not a git-annex key. Deleting this file."
liftIO $ nukeFile f
Just k -> void $
getViaTmp' AlwaysVerify k $ \dest -> unVerified $
liftIO $ catchBoolIO $ do
rename f dest
return True
-- Under Windows, uftp uses key containers, which are not files on the
-- filesystem.
data UftpKey = KeyFile FilePath | KeyContainer String
uftpKeyParam :: UftpKey -> CommandParam
uftpKeyParam (KeyFile f) = File f
uftpKeyParam (KeyContainer s) = Param s
uftpKey :: Annex UftpKey
#ifdef mingw32_HOST_OS
uftpKey = do
u <- getUUID
return $ KeyContainer $ "annex-" ++ fromUUID u
#else
uftpKey = KeyFile <$> cacheCredsFile "multicast"
#endif
-- uftp needs a unique UID for each client and server, which
-- is a 8 digit hex number in the form "0xnnnnnnnn"
-- Derive it from the UUID.
uftpUID :: UUID -> String
uftpUID u = "0x" ++ (take 8 $ show $ sha2_256 $ B8.fromString (fromUUID u))
withAuthList :: (FilePath -> Annex a) -> Annex a
withAuthList a = do
m <- knownFingerPrints
withTmpFile "authlist" $ \t h -> do
liftIO $ hPutStr h (genAuthList m)
liftIO $ hClose h
a t
genAuthList :: M.Map UUID Fingerprint -> String
genAuthList = unlines . map fmt . M.toList
where
fmt (u, Fingerprint f) = uftpUID u ++ "|" ++ f

View file

@ -41,6 +41,7 @@ topLevelUUIDBasedLogs =
, scheduleLog , scheduleLog
, activityLog , activityLog
, differenceLog , differenceLog
, multicastLog
] ]
{- All the ways to get a key from a presence log file -} {- All the ways to get a key from a presence log file -}
@ -93,6 +94,8 @@ activityLog = "activity.log"
differenceLog :: FilePath differenceLog :: FilePath
differenceLog = "difference.log" differenceLog = "difference.log"
multicastLog :: FilePath
multicastLog = "multicast.log"
{- The pathname of the location log file for a given key. -} {- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String locationLogFile :: GitConfig -> Key -> String

33
Logs/Multicast.hs Normal file
View file

@ -0,0 +1,33 @@
{- git-annex multicast fingerprint log
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Multicast (
Fingerprint(..),
recordFingerprint,
knownFingerPrints,
) where
import Data.Time.Clock.POSIX
import Annex.Common
import qualified Annex.Branch
import Logs
import Logs.UUIDBased
import qualified Data.Map as M
newtype Fingerprint = Fingerprint String
deriving (Eq, Read, Show)
recordFingerprint :: Fingerprint -> UUID -> Annex ()
recordFingerprint fp uuid = do
ts <- liftIO getPOSIXTime
Annex.Branch.change multicastLog $
showLog show . changeLog ts uuid fp . parseLog readish
knownFingerPrints :: Annex (M.Map UUID Fingerprint)
knownFingerPrints = simpleMap . parseLog readish <$> Annex.Branch.get activityLog

View file

@ -177,7 +177,10 @@ writeFileProtected file content = writeFileProtected' file
(\h -> hPutStr h content) (\h -> hPutStr h content)
writeFileProtected' :: FilePath -> (Handle -> IO ()) -> IO () writeFileProtected' :: FilePath -> (Handle -> IO ()) -> IO ()
writeFileProtected' file writer = withUmask 0o0077 $ writeFileProtected' file writer = protectedOutput $
withFile file WriteMode $ \h -> do withFile file WriteMode $ \h -> do
void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes
writer h writer h
protectedOutput :: IO a -> IO a
protectedOutput = withUmask 0o0077

1
debian/control vendored
View file

@ -116,6 +116,7 @@ Suggests:
magic-wormhole, magic-wormhole,
tahoe-lafs, tahoe-lafs,
libnss-mdns, libnss-mdns,
uftp,
Description: manage files with git, without checking their contents into git Description: manage files with git, without checking their contents into git
git-annex allows managing files with git, without checking the file git-annex allows managing files with git, without checking the file
contents into git. While that may seem paradoxical, it is useful when contents into git. While that may seem paradoxical, it is useful when

View file

@ -0,0 +1,95 @@
# NAME
git-annex multicast - multicast file distribution
# SYNOPSIS
git annex multicast [options]
# DESCRIPTION
Multicast allows files to be broadcast to multiple receivers,
typically on a single local network.
The uftp program is used for multicast.
<http://uftp-multicast.sourceforge.net/>
# OPTIONS
* `--gen-address`
Generates a multicast encryption key and stores a corresponding multicast
address to the git-annex branch.
* `--send [file]`
Sends the specified files to any receivers whose multicast addresses
are stored in the git-annex branch.
When no files are specified, all annexed files in the current directory
and subdirectories are sent.
The [[git-annex-matching-options]] can be used to control which files to
send. For example:
git annex multicast send . --not --copies 2
* `--receive`
Receives files from senders whose multicast addresses
are stored in the git-annex brach.
As each file is received, its filename is displayed. This is the filename
that the sender used; the local working tree may use a different name
for the file, or not contain a link to the file.
This command continues running, until it is interrupted by you pressing
ctrl-c.
Note that the configured annex.diskreserve is not honored by this
command, because `uftpd` receives the actual files, and can receive
any size file.
* `--uftp-opt=option` `-Uoption`
Pass an option on to the uftp/uftpd command. May be specified multiple
times.
For example, to broadcast at 50 Mbps:
git annex multicast send -U-R -U50000
# EXAMPLE
Suppose a teacher wants to multicast files to students in a classroom.
This assumes that the teacher and students have cloned a git-annex
repository, and both can push changes to its git-annex branch,
or otherwise push changes to each-other.
First, the teacher runs `git annex multicast --gen-address; git annex sync`
Next, students each run `git annex multicast --gen-address; git annex sync`
Once all the students have generated addresses, the teacher runs
`git annex sync` once more. (Now the students all have received the
teacher's address, and the teacher has received all the student's addresses.)
Next students each run `git annex multicast --receive`
Finally, once the students are all listening (ahem), teacher runs
`git annex multicast --send`
# SEE ALSO
[[git-annex]](1)
uftp(1)
uftpd(1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -164,6 +164,12 @@ subdirectories).
See [[git-annex-undo]](1) for details. See [[git-annex-undo]](1) for details.
* `multicast`
Multicast file distribution.
See [[git-annex-multicast]](1) for details.
* `watch` * `watch`
Watch for changes and autocommit. Watch for changes and autocommit.

View file

@ -288,3 +288,7 @@ that should prevent merging.
Example: Example:
e605dca6-446a-11e0-8b2a-002170d25c55 [ObjectHashLower] timestamp=1422387398.30395s e605dca6-446a-11e0-8b2a-002170d25c55 [ObjectHashLower] timestamp=1422387398.30395s
## `multicast.log`
Records uftp public key fingerprints, for use by [[git-annex-multicast]].

View file

@ -5,3 +5,6 @@ Although I haven't remembered that "hash thing" to perform the job, we looked ar
What do you think? What do you think?
[[!meta name=yoh]] [[!meta name=yoh]]
> [[done]]! I've only tested it with sender and receiver on the same
> laptop, but it seems to work. --[[Joey]]

View file

@ -90,6 +90,7 @@ Extra-Source-Files:
doc/git-annex-migrate.mdwn doc/git-annex-migrate.mdwn
doc/git-annex-mirror.mdwn doc/git-annex-mirror.mdwn
doc/git-annex-move.mdwn doc/git-annex-move.mdwn
doc/git-annex-multicast.mdwn
doc/git-annex-numcopies.mdwn doc/git-annex-numcopies.mdwn
doc/git-annex-p2p.mdwn doc/git-annex-p2p.mdwn
doc/git-annex-pre-commit.mdwn doc/git-annex-pre-commit.mdwn
@ -318,7 +319,7 @@ Executable git-annex
stm (>= 2.3), stm (>= 2.3),
mtl (>= 2), mtl (>= 2),
uuid (>= 1.2.6), uuid (>= 1.2.6),
process, process (>= 1.4.2.0),
data-default, data-default,
case-insensitive, case-insensitive,
random, random,
@ -518,6 +519,7 @@ Executable git-annex
Annex.MakeRepo Annex.MakeRepo
Annex.MetaData Annex.MetaData
Annex.MetaData.StandardFields Annex.MetaData.StandardFields
Annex.Multicast
Annex.Notification Annex.Notification
Annex.NumCopies Annex.NumCopies
Annex.Path Annex.Path
@ -732,6 +734,7 @@ Executable git-annex
Command.Migrate Command.Migrate
Command.Mirror Command.Mirror
Command.Move Command.Move
Command.Multicast
Command.NotifyChanges Command.NotifyChanges
Command.NumCopies Command.NumCopies
Command.P2P Command.P2P
@ -857,6 +860,7 @@ Executable git-annex
Logs.Location Logs.Location
Logs.MapLog Logs.MapLog
Logs.MetaData Logs.MetaData
Logs.Multicast
Logs.NumCopies Logs.NumCopies
Logs.PreferredContent Logs.PreferredContent
Logs.PreferredContent.Raw Logs.PreferredContent.Raw