multicast: New command, uses uftp to multicast annexed files, for eg a classroom setting.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-03-30 19:32:58 -04:00
parent 39e8433d46
commit c3970f6c1a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 454 additions and 2 deletions

41
Annex/Multicast.hs Normal file
View file

@ -0,0 +1,41 @@
{- git-annex multicast receive callback
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Multicast where
import Config.Files
import Utility.Env
import Utility.PartialPrelude
import System.Process
import System.IO
import GHC.IO.Handle.FD
multicastReceiveEnv :: String
multicastReceiveEnv = "GIT_ANNEX_MULTICAST_RECEIVE"
multicastCallbackEnv :: IO (FilePath, [(String, String)], Handle)
multicastCallbackEnv = do
gitannex <- readProgramFile
(rfd, wfd) <- createPipeFd
rh <- fdToHandle rfd
environ <- addEntry multicastReceiveEnv (show wfd) <$> getEnvironment
return (gitannex, environ, rh)
-- This is run when uftpd has received a file. Rather than move
-- the file into the annex here, which would require starting up the
-- Annex monad, parsing git config, and verifying the content, simply
-- output to the specified FD the filename. This keeps the time
-- that uftpd is not receiving the next file as short as possible.
runMulticastReceive :: [String] -> String -> IO ()
runMulticastReceive ("-I":_sessionid:fs) hs = case readish hs of
Just fd -> do
h <- fdToHandle fd
mapM_ (hPutStrLn h) fs
hClose h
Nothing -> return ()
runMulticastReceive _ _ = return ()

View file

@ -2,6 +2,8 @@ git-annex (6.20170322) UNRELEASED; urgency=medium
* When a http remote does not expose an annex.uuid config, only warn
about it once, not every time git-annex is run.
* multicast: New command, uses uftp to multicast annexed files, for eg
a classroom setting.
-- Joey Hess <id@joeyh.name> Wed, 29 Mar 2017 12:41:46 -0400

View file

@ -14,6 +14,7 @@ import CmdLine
import Command
import Utility.Env
import Annex.Ssh
import Annex.Multicast
import Types.Test
import qualified Command.Help
@ -53,6 +54,7 @@ import qualified Command.Describe
import qualified Command.InitRemote
import qualified Command.EnableRemote
import qualified Command.EnableTor
import qualified Command.Multicast
import qualified Command.Expire
import qualified Command.Repair
import qualified Command.Unused
@ -144,6 +146,7 @@ cmds testoptparser testrunner =
, Command.InitRemote.cmd
, Command.EnableRemote.cmd
, Command.EnableTor.cmd
, Command.Multicast.cmd
, Command.Reinject.cmd
, Command.Unannex.cmd
, Command.Uninit.cmd
@ -242,4 +245,5 @@ run testoptparser testrunner args = go envmodes
envmodes =
[ (sshOptionsEnv, runSshOptions args)
, (sshAskPassEnv, runSshAskPass)
, (multicastReceiveEnv, runMulticastReceive args)
]

253
Command/Multicast.hs Normal file
View file

@ -0,0 +1,253 @@
{- git-annex command
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Command.Multicast where
import Command
import Logs.Multicast
import Annex.Multicast
import Annex.WorkTree
import Annex.Content
import Annex.UUID
#ifndef mingw32_HOST_OS
import Creds
import Annex.Perms
import Utility.FileMode
#endif
import qualified Limit
import Types.FileMatcher
import qualified Git.LsFiles as LsFiles
import Utility.Hash
import Utility.Tmp
import Config
import Data.Char
import qualified Data.ByteString.Lazy.UTF8 as B8
import qualified Data.Map as M
import Control.Concurrent.Async
cmd :: Command
cmd = command "multicast" SectionCommon "multicast file distribution"
paramNothing (seek <$$> optParser)
data MultiCastAction
= GenAddress
| Send
| Receive
deriving (Show)
data MultiCastOptions = MultiCastOptions MultiCastAction [CommandParam] [FilePath]
deriving (Show)
optParser :: CmdParamsDesc -> Parser MultiCastOptions
optParser _ = MultiCastOptions
<$> (genaddressp <|> sendp <|> receivep)
<*> many uftpopt
<*> cmdParams paramPaths
where
genaddressp = flag' GenAddress
( long "gen-address"
<> help "generate multicast encryption key and store address in git-annex branch"
)
sendp = flag' Send
( long "send"
<> help "multicast files"
)
receivep = flag' Receive
( long "receive"
<> help "listen for multicast files and store in repository"
)
uftpopt = Param <$> strOption
( long "uftp-opt"
<> short 'U'
<> help "passed on to uftp/uftpd"
<> metavar "OPTION"
)
seek :: MultiCastOptions -> CommandSeek
seek (MultiCastOptions GenAddress _ _) = commandAction genAddress
seek (MultiCastOptions Send ups fs) = commandAction $ send ups fs
seek (MultiCastOptions Receive ups []) = commandAction $ receive ups
seek (MultiCastOptions Receive _ _) = giveup "Cannot specify list of files with --receive; this receives whatever files the sender chooses to send."
genAddress :: CommandStart
genAddress = do
showStart "gen-address" ""
k <- uftpKey
(s, ok) <- case k of
KeyContainer s -> liftIO $ genkey (Param s)
KeyFile f -> do
createAnnexDirectory (takeDirectory f)
liftIO $ nukeFile f
liftIO $ protectedOutput $ genkey (File f)
case (ok, parseFingerprint s) of
(False, _) -> giveup $ "uftp_keymgt failed: " ++ s
(_, Nothing) -> giveup $ "Failed to find fingerprint in uftp_keymgt output: " ++ s
(True, Just fp) -> next $ next $ do
recordFingerprint fp =<< getUUID
return True
where
-- Annoyingly, the fingerprint is output to stderr.
genkey p = processTranscript "uftp_keymgt" ps Nothing
where
ps = toCommand $
[ Param "-g"
, keyparam
, p
]
-- uftp only supports rsa up to 2048 which is on the lower
-- limit of secure RSA key sizes. Instead, use an EC curve.
-- Except for on Windows XP, secp521r1 is supported on all
-- platforms by uftp. DJB thinks it's pretty good compared
-- with other NIST curves: "there's one standard NIST curve
-- using a nice prime, namely 25211 but the sheer size of this
-- prime makes it much slower than NIST P-256"
-- (http://blog.cr.yp.to/20140323-ecdsa.html)
-- Since this key is only used to set up the block encryption,
-- its slow speed is ok.
keyparam = Param "ec:secp521r1"
parseFingerprint :: String -> Maybe Fingerprint
parseFingerprint = Fingerprint <$$> lastMaybe . filter isfingerprint . words
where
isfingerprint s =
let os = filter (all isHexDigit) (splitc ':' s)
in length os == 20
send :: [CommandParam] -> [FilePath] -> CommandStart
send ups fs = withTmpFile "send" $ \t h -> do
-- Need to be able to send files with the names of git-annex
-- keys, and uftp does not allow renaming the files that are sent.
-- In a direct mode repository, the annex objects do not have
-- the names of keys, and would have to be copied, which is too
-- expensive.
whenM isDirect $
giveup "Sorry, multicast send cannot be done from a direct mode repository."
showStart "generating file list" ""
fs' <- seekHelper LsFiles.inRepo fs
matcher <- Limit.getMatcher
let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $
liftIO $ hPutStrLn h o
forM_ fs' $ \f -> do
mk <- lookupFile f
case mk of
Nothing -> noop
Just k -> withObjectLoc k (addlist f) (const noop)
liftIO $ hClose h
showEndOk
showStart "sending files" ""
showOutput
serverkey <- uftpKey
u <- getUUID
withAuthList $ \authlist -> do
let ps =
-- Force client authentication.
[ Param "-c"
, Param "-Y", Param "aes256-cbc"
, Param "-h", Param "sha512"
-- Picked ecdh_ecdsa for perfect forward secrecy,
-- and because a EC key exchange algorithm is
-- needed since all keys are EC.
, Param "-e", Param "ecdh_ecdsa"
, Param "-k", uftpKeyParam serverkey
, Param "-U", Param (uftpUID u)
-- only allow clients on the authlist
, Param "-H", Param ("@"++authlist)
-- pass in list of files to send
, Param "-i", File t
] ++ ups
liftIO (boolSystem "uftp" ps) >>= showEndResult
stop
receive :: [CommandParam] -> CommandStart
receive ups = do
showStart "receiving multicast files" ""
showNote "Will continue to run until stopped by ctrl-c"
showOutput
clientkey <- uftpKey
u <- getUUID
(callback, environ, statush) <- liftIO multicastCallbackEnv
tmpobjdir <- fromRepo gitAnnexTmpObjectDir
createAnnexDirectory tmpobjdir
withTmpDirIn tmpobjdir "multicast" $ \tmpdir -> withAuthList $ \authlist ->
abstmpdir <- liftIO $ absPath tmpdir
abscallback <- liftIO $ searchPath callback
let ps =
-- Avoid it running as a daemon.
[ Param "-d"
-- Require encryption.
, Param "-E"
, Param "-k", uftpKeyParam clientkey
, Param "-U", Param (uftpUID u)
-- Only allow servers on the authlist
, Param "-S", Param authlist
-- Receive files into tmpdir
-- (it needs an absolute path)
, Param "-D", File abstmpdir
-- Run callback after each file received
-- (it needs an absolute path)
, Param "-s", Param (fromMaybe callback abscallback)
] ++ ups
runner <- liftIO $ async $
hClose statush
`after` boolSystemEnv "uftpd" ps (Just environ)
mapM_ storeReceived . lines =<< liftIO (hGetContents statush)
showEndResult =<< liftIO (wait runner)
stop
storeReceived :: FilePath -> Annex ()
storeReceived f = do
case file2key (takeFileName f) of
Nothing -> do
warning $ "Received a file " ++ f ++ " that is not a git-annex key. Deleting this file."
liftIO $ nukeFile f
Just k -> void $
getViaTmp' AlwaysVerify k $ \dest -> unVerified $
liftIO $ catchBoolIO $ do
rename f dest
return True
-- Under Windows, uftp uses key containers, which are not files on the
-- filesystem.
data UftpKey = KeyFile FilePath | KeyContainer String
uftpKeyParam :: UftpKey -> CommandParam
uftpKeyParam (KeyFile f) = File f
uftpKeyParam (KeyContainer s) = Param s
uftpKey :: Annex UftpKey
#ifdef mingw32_HOST_OS
uftpKey = do
u <- getUUID
return $ KeyContainer $ "annex-" ++ fromUUID u
#else
uftpKey = KeyFile <$> cacheCredsFile "multicast"
#endif
-- uftp needs a unique UID for each client and server, which
-- is a 8 digit hex number in the form "0xnnnnnnnn"
-- Derive it from the UUID.
uftpUID :: UUID -> String
uftpUID u = "0x" ++ (take 8 $ show $ sha2_256 $ B8.fromString (fromUUID u))
withAuthList :: (FilePath -> Annex a) -> Annex a
withAuthList a = do
m <- knownFingerPrints
withTmpFile "authlist" $ \t h -> do
liftIO $ hPutStr h (genAuthList m)
liftIO $ hClose h
a t
genAuthList :: M.Map UUID Fingerprint -> String
genAuthList = unlines . map fmt . M.toList
where
fmt (u, Fingerprint f) = uftpUID u ++ "|" ++ f

View file

@ -41,6 +41,7 @@ topLevelUUIDBasedLogs =
, scheduleLog
, activityLog
, differenceLog
, multicastLog
]
{- All the ways to get a key from a presence log file -}
@ -93,6 +94,8 @@ activityLog = "activity.log"
differenceLog :: FilePath
differenceLog = "difference.log"
multicastLog :: FilePath
multicastLog = "multicast.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String

33
Logs/Multicast.hs Normal file
View file

@ -0,0 +1,33 @@
{- git-annex multicast fingerprint log
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Multicast (
Fingerprint(..),
recordFingerprint,
knownFingerPrints,
) where
import Data.Time.Clock.POSIX
import Annex.Common
import qualified Annex.Branch
import Logs
import Logs.UUIDBased
import qualified Data.Map as M
newtype Fingerprint = Fingerprint String
deriving (Eq, Read, Show)
recordFingerprint :: Fingerprint -> UUID -> Annex ()
recordFingerprint fp uuid = do
ts <- liftIO getPOSIXTime
Annex.Branch.change multicastLog $
showLog show . changeLog ts uuid fp . parseLog readish
knownFingerPrints :: Annex (M.Map UUID Fingerprint)
knownFingerPrints = simpleMap . parseLog readish <$> Annex.Branch.get activityLog

View file

@ -177,7 +177,10 @@ writeFileProtected file content = writeFileProtected' file
(\h -> hPutStr h content)
writeFileProtected' :: FilePath -> (Handle -> IO ()) -> IO ()
writeFileProtected' file writer = withUmask 0o0077 $
writeFileProtected' file writer = protectedOutput $
withFile file WriteMode $ \h -> do
void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes
writer h
protectedOutput :: IO a -> IO a
protectedOutput = withUmask 0o0077

1
debian/control vendored
View file

@ -116,6 +116,7 @@ Suggests:
magic-wormhole,
tahoe-lafs,
libnss-mdns,
uftp,
Description: manage files with git, without checking their contents into git
git-annex allows managing files with git, without checking the file
contents into git. While that may seem paradoxical, it is useful when

View file

@ -0,0 +1,95 @@
# NAME
git-annex multicast - multicast file distribution
# SYNOPSIS
git annex multicast [options]
# DESCRIPTION
Multicast allows files to be broadcast to multiple receivers,
typically on a single local network.
The uftp program is used for multicast.
<http://uftp-multicast.sourceforge.net/>
# OPTIONS
* `--gen-address`
Generates a multicast encryption key and stores a corresponding multicast
address to the git-annex branch.
* `--send [file]`
Sends the specified files to any receivers whose multicast addresses
are stored in the git-annex branch.
When no files are specified, all annexed files in the current directory
and subdirectories are sent.
The [[git-annex-matching-options]] can be used to control which files to
send. For example:
git annex multicast send . --not --copies 2
* `--receive`
Receives files from senders whose multicast addresses
are stored in the git-annex brach.
As each file is received, its filename is displayed. This is the filename
that the sender used; the local working tree may use a different name
for the file, or not contain a link to the file.
This command continues running, until it is interrupted by you pressing
ctrl-c.
Note that the configured annex.diskreserve is not honored by this
command, because `uftpd` receives the actual files, and can receive
any size file.
* `--uftp-opt=option` `-Uoption`
Pass an option on to the uftp/uftpd command. May be specified multiple
times.
For example, to broadcast at 50 Mbps:
git annex multicast send -U-R -U50000
# EXAMPLE
Suppose a teacher wants to multicast files to students in a classroom.
This assumes that the teacher and students have cloned a git-annex
repository, and both can push changes to its git-annex branch,
or otherwise push changes to each-other.
First, the teacher runs `git annex multicast --gen-address; git annex sync`
Next, students each run `git annex multicast --gen-address; git annex sync`
Once all the students have generated addresses, the teacher runs
`git annex sync` once more. (Now the students all have received the
teacher's address, and the teacher has received all the student's addresses.)
Next students each run `git annex multicast --receive`
Finally, once the students are all listening (ahem), teacher runs
`git annex multicast --send`
# SEE ALSO
[[git-annex]](1)
uftp(1)
uftpd(1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -164,6 +164,12 @@ subdirectories).
See [[git-annex-undo]](1) for details.
* `multicast`
Multicast file distribution.
See [[git-annex-multicast]](1) for details.
* `watch`
Watch for changes and autocommit.

View file

@ -288,3 +288,7 @@ that should prevent merging.
Example:
e605dca6-446a-11e0-8b2a-002170d25c55 [ObjectHashLower] timestamp=1422387398.30395s
## `multicast.log`
Records uftp public key fingerprints, for use by [[git-annex-multicast]].

View file

@ -5,3 +5,6 @@ Although I haven't remembered that "hash thing" to perform the job, we looked ar
What do you think?
[[!meta name=yoh]]
> [[done]]! I've only tested it with sender and receiver on the same
> laptop, but it seems to work. --[[Joey]]

View file

@ -90,6 +90,7 @@ Extra-Source-Files:
doc/git-annex-migrate.mdwn
doc/git-annex-mirror.mdwn
doc/git-annex-move.mdwn
doc/git-annex-multicast.mdwn
doc/git-annex-numcopies.mdwn
doc/git-annex-p2p.mdwn
doc/git-annex-pre-commit.mdwn
@ -318,7 +319,7 @@ Executable git-annex
stm (>= 2.3),
mtl (>= 2),
uuid (>= 1.2.6),
process,
process (>= 1.4.2.0),
data-default,
case-insensitive,
random,
@ -518,6 +519,7 @@ Executable git-annex
Annex.MakeRepo
Annex.MetaData
Annex.MetaData.StandardFields
Annex.Multicast
Annex.Notification
Annex.NumCopies
Annex.Path
@ -732,6 +734,7 @@ Executable git-annex
Command.Migrate
Command.Mirror
Command.Move
Command.Multicast
Command.NotifyChanges
Command.NumCopies
Command.P2P
@ -857,6 +860,7 @@ Executable git-annex
Logs.Location
Logs.MapLog
Logs.MetaData
Logs.Multicast
Logs.NumCopies
Logs.PreferredContent
Logs.PreferredContent.Raw