From c3970f6c1a156a74b137f1d12c27cd70eed613c8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 30 Mar 2017 19:32:58 -0400 Subject: [PATCH] multicast: New command, uses uftp to multicast annexed files, for eg a classroom setting. This commit was supported by the NSF-funded DataLad project. --- Annex/Multicast.hs | 41 +++ CHANGELOG | 2 + CmdLine/GitAnnex.hs | 4 + Command/Multicast.hs | 253 ++++++++++++++++++ Logs.hs | 3 + Logs/Multicast.hs | 33 +++ Utility/FileMode.hs | 5 +- debian/control | 1 + doc/git-annex-multicast.mdwn | 95 +++++++ doc/git-annex.mdwn | 6 + doc/internals.mdwn | 4 + ...casting__34___of_content_on_local_net.mdwn | 3 + git-annex.cabal | 6 +- 13 files changed, 454 insertions(+), 2 deletions(-) create mode 100644 Annex/Multicast.hs create mode 100644 Command/Multicast.hs create mode 100644 Logs/Multicast.hs create mode 100644 doc/git-annex-multicast.mdwn diff --git a/Annex/Multicast.hs b/Annex/Multicast.hs new file mode 100644 index 0000000000..16aa1bd335 --- /dev/null +++ b/Annex/Multicast.hs @@ -0,0 +1,41 @@ +{- git-annex multicast receive callback + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Multicast where + +import Config.Files +import Utility.Env +import Utility.PartialPrelude + +import System.Process +import System.IO +import GHC.IO.Handle.FD + +multicastReceiveEnv :: String +multicastReceiveEnv = "GIT_ANNEX_MULTICAST_RECEIVE" + +multicastCallbackEnv :: IO (FilePath, [(String, String)], Handle) +multicastCallbackEnv = do + gitannex <- readProgramFile + (rfd, wfd) <- createPipeFd + rh <- fdToHandle rfd + environ <- addEntry multicastReceiveEnv (show wfd) <$> getEnvironment + return (gitannex, environ, rh) + +-- This is run when uftpd has received a file. Rather than move +-- the file into the annex here, which would require starting up the +-- Annex monad, parsing git config, and verifying the content, simply +-- output to the specified FD the filename. This keeps the time +-- that uftpd is not receiving the next file as short as possible. +runMulticastReceive :: [String] -> String -> IO () +runMulticastReceive ("-I":_sessionid:fs) hs = case readish hs of + Just fd -> do + h <- fdToHandle fd + mapM_ (hPutStrLn h) fs + hClose h + Nothing -> return () +runMulticastReceive _ _ = return () diff --git a/CHANGELOG b/CHANGELOG index c58c3cf8a6..f69aadedd9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ git-annex (6.20170322) UNRELEASED; urgency=medium * When a http remote does not expose an annex.uuid config, only warn about it once, not every time git-annex is run. + * multicast: New command, uses uftp to multicast annexed files, for eg + a classroom setting. -- Joey Hess Wed, 29 Mar 2017 12:41:46 -0400 diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 0e472005c0..be5f56ba0a 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -14,6 +14,7 @@ import CmdLine import Command import Utility.Env import Annex.Ssh +import Annex.Multicast import Types.Test import qualified Command.Help @@ -53,6 +54,7 @@ import qualified Command.Describe import qualified Command.InitRemote import qualified Command.EnableRemote import qualified Command.EnableTor +import qualified Command.Multicast import qualified Command.Expire import qualified Command.Repair import qualified Command.Unused @@ -144,6 +146,7 @@ cmds testoptparser testrunner = , Command.InitRemote.cmd , Command.EnableRemote.cmd , Command.EnableTor.cmd + , Command.Multicast.cmd , Command.Reinject.cmd , Command.Unannex.cmd , Command.Uninit.cmd @@ -242,4 +245,5 @@ run testoptparser testrunner args = go envmodes envmodes = [ (sshOptionsEnv, runSshOptions args) , (sshAskPassEnv, runSshAskPass) + , (multicastReceiveEnv, runMulticastReceive args) ] diff --git a/Command/Multicast.hs b/Command/Multicast.hs new file mode 100644 index 0000000000..cd74c3ebcf --- /dev/null +++ b/Command/Multicast.hs @@ -0,0 +1,253 @@ +{- git-annex command + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Command.Multicast where + +import Command +import Logs.Multicast +import Annex.Multicast +import Annex.WorkTree +import Annex.Content +import Annex.UUID +#ifndef mingw32_HOST_OS +import Creds +import Annex.Perms +import Utility.FileMode +#endif +import qualified Limit +import Types.FileMatcher +import qualified Git.LsFiles as LsFiles +import Utility.Hash +import Utility.Tmp +import Config + +import Data.Char +import qualified Data.ByteString.Lazy.UTF8 as B8 +import qualified Data.Map as M +import Control.Concurrent.Async + +cmd :: Command +cmd = command "multicast" SectionCommon "multicast file distribution" + paramNothing (seek <$$> optParser) + +data MultiCastAction + = GenAddress + | Send + | Receive + deriving (Show) + +data MultiCastOptions = MultiCastOptions MultiCastAction [CommandParam] [FilePath] + deriving (Show) + +optParser :: CmdParamsDesc -> Parser MultiCastOptions +optParser _ = MultiCastOptions + <$> (genaddressp <|> sendp <|> receivep) + <*> many uftpopt + <*> cmdParams paramPaths + where + genaddressp = flag' GenAddress + ( long "gen-address" + <> help "generate multicast encryption key and store address in git-annex branch" + ) + sendp = flag' Send + ( long "send" + <> help "multicast files" + ) + receivep = flag' Receive + ( long "receive" + <> help "listen for multicast files and store in repository" + ) + uftpopt = Param <$> strOption + ( long "uftp-opt" + <> short 'U' + <> help "passed on to uftp/uftpd" + <> metavar "OPTION" + ) + +seek :: MultiCastOptions -> CommandSeek +seek (MultiCastOptions GenAddress _ _) = commandAction genAddress +seek (MultiCastOptions Send ups fs) = commandAction $ send ups fs +seek (MultiCastOptions Receive ups []) = commandAction $ receive ups +seek (MultiCastOptions Receive _ _) = giveup "Cannot specify list of files with --receive; this receives whatever files the sender chooses to send." + +genAddress :: CommandStart +genAddress = do + showStart "gen-address" "" + k <- uftpKey + (s, ok) <- case k of + KeyContainer s -> liftIO $ genkey (Param s) + KeyFile f -> do + createAnnexDirectory (takeDirectory f) + liftIO $ nukeFile f + liftIO $ protectedOutput $ genkey (File f) + case (ok, parseFingerprint s) of + (False, _) -> giveup $ "uftp_keymgt failed: " ++ s + (_, Nothing) -> giveup $ "Failed to find fingerprint in uftp_keymgt output: " ++ s + (True, Just fp) -> next $ next $ do + recordFingerprint fp =<< getUUID + return True + where + -- Annoyingly, the fingerprint is output to stderr. + genkey p = processTranscript "uftp_keymgt" ps Nothing + where + ps = toCommand $ + [ Param "-g" + , keyparam + , p + ] + -- uftp only supports rsa up to 2048 which is on the lower + -- limit of secure RSA key sizes. Instead, use an EC curve. + -- Except for on Windows XP, secp521r1 is supported on all + -- platforms by uftp. DJB thinks it's pretty good compared + -- with other NIST curves: "there's one standard NIST curve + -- using a nice prime, namely 2521−1 but the sheer size of this + -- prime makes it much slower than NIST P-256" + -- (http://blog.cr.yp.to/20140323-ecdsa.html) + -- Since this key is only used to set up the block encryption, + -- its slow speed is ok. + keyparam = Param "ec:secp521r1" + +parseFingerprint :: String -> Maybe Fingerprint +parseFingerprint = Fingerprint <$$> lastMaybe . filter isfingerprint . words + where + isfingerprint s = + let os = filter (all isHexDigit) (splitc ':' s) + in length os == 20 + +send :: [CommandParam] -> [FilePath] -> CommandStart +send ups fs = withTmpFile "send" $ \t h -> do + -- Need to be able to send files with the names of git-annex + -- keys, and uftp does not allow renaming the files that are sent. + -- In a direct mode repository, the annex objects do not have + -- the names of keys, and would have to be copied, which is too + -- expensive. + whenM isDirect $ + giveup "Sorry, multicast send cannot be done from a direct mode repository." + + showStart "generating file list" "" + fs' <- seekHelper LsFiles.inRepo fs + matcher <- Limit.getMatcher + let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $ + liftIO $ hPutStrLn h o + forM_ fs' $ \f -> do + mk <- lookupFile f + case mk of + Nothing -> noop + Just k -> withObjectLoc k (addlist f) (const noop) + liftIO $ hClose h + showEndOk + + showStart "sending files" "" + showOutput + serverkey <- uftpKey + u <- getUUID + withAuthList $ \authlist -> do + let ps = + -- Force client authentication. + [ Param "-c" + , Param "-Y", Param "aes256-cbc" + , Param "-h", Param "sha512" + -- Picked ecdh_ecdsa for perfect forward secrecy, + -- and because a EC key exchange algorithm is + -- needed since all keys are EC. + , Param "-e", Param "ecdh_ecdsa" + , Param "-k", uftpKeyParam serverkey + , Param "-U", Param (uftpUID u) + -- only allow clients on the authlist + , Param "-H", Param ("@"++authlist) + -- pass in list of files to send + , Param "-i", File t + ] ++ ups + liftIO (boolSystem "uftp" ps) >>= showEndResult + stop + +receive :: [CommandParam] -> CommandStart +receive ups = do + showStart "receiving multicast files" "" + showNote "Will continue to run until stopped by ctrl-c" + + showOutput + clientkey <- uftpKey + u <- getUUID + (callback, environ, statush) <- liftIO multicastCallbackEnv + tmpobjdir <- fromRepo gitAnnexTmpObjectDir + createAnnexDirectory tmpobjdir + withTmpDirIn tmpobjdir "multicast" $ \tmpdir -> withAuthList $ \authlist -> + abstmpdir <- liftIO $ absPath tmpdir + abscallback <- liftIO $ searchPath callback + let ps = + -- Avoid it running as a daemon. + [ Param "-d" + -- Require encryption. + , Param "-E" + , Param "-k", uftpKeyParam clientkey + , Param "-U", Param (uftpUID u) + -- Only allow servers on the authlist + , Param "-S", Param authlist + -- Receive files into tmpdir + -- (it needs an absolute path) + , Param "-D", File abstmpdir + -- Run callback after each file received + -- (it needs an absolute path) + , Param "-s", Param (fromMaybe callback abscallback) + ] ++ ups + runner <- liftIO $ async $ + hClose statush + `after` boolSystemEnv "uftpd" ps (Just environ) + mapM_ storeReceived . lines =<< liftIO (hGetContents statush) + showEndResult =<< liftIO (wait runner) + stop + +storeReceived :: FilePath -> Annex () +storeReceived f = do + case file2key (takeFileName f) of + Nothing -> do + warning $ "Received a file " ++ f ++ " that is not a git-annex key. Deleting this file." + liftIO $ nukeFile f + Just k -> void $ + getViaTmp' AlwaysVerify k $ \dest -> unVerified $ + liftIO $ catchBoolIO $ do + rename f dest + return True + +-- Under Windows, uftp uses key containers, which are not files on the +-- filesystem. +data UftpKey = KeyFile FilePath | KeyContainer String + +uftpKeyParam :: UftpKey -> CommandParam +uftpKeyParam (KeyFile f) = File f +uftpKeyParam (KeyContainer s) = Param s + +uftpKey :: Annex UftpKey +#ifdef mingw32_HOST_OS +uftpKey = do + u <- getUUID + return $ KeyContainer $ "annex-" ++ fromUUID u +#else +uftpKey = KeyFile <$> cacheCredsFile "multicast" +#endif + +-- uftp needs a unique UID for each client and server, which +-- is a 8 digit hex number in the form "0xnnnnnnnn" +-- Derive it from the UUID. +uftpUID :: UUID -> String +uftpUID u = "0x" ++ (take 8 $ show $ sha2_256 $ B8.fromString (fromUUID u)) + +withAuthList :: (FilePath -> Annex a) -> Annex a +withAuthList a = do + m <- knownFingerPrints + withTmpFile "authlist" $ \t h -> do + liftIO $ hPutStr h (genAuthList m) + liftIO $ hClose h + a t + +genAuthList :: M.Map UUID Fingerprint -> String +genAuthList = unlines . map fmt . M.toList + where + fmt (u, Fingerprint f) = uftpUID u ++ "|" ++ f diff --git a/Logs.hs b/Logs.hs index 38bd1c0683..716520af44 100644 --- a/Logs.hs +++ b/Logs.hs @@ -41,6 +41,7 @@ topLevelUUIDBasedLogs = , scheduleLog , activityLog , differenceLog + , multicastLog ] {- All the ways to get a key from a presence log file -} @@ -93,6 +94,8 @@ activityLog = "activity.log" differenceLog :: FilePath differenceLog = "difference.log" +multicastLog :: FilePath +multicastLog = "multicast.log" {- The pathname of the location log file for a given key. -} locationLogFile :: GitConfig -> Key -> String diff --git a/Logs/Multicast.hs b/Logs/Multicast.hs new file mode 100644 index 0000000000..386899fdf6 --- /dev/null +++ b/Logs/Multicast.hs @@ -0,0 +1,33 @@ +{- git-annex multicast fingerprint log + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Multicast ( + Fingerprint(..), + recordFingerprint, + knownFingerPrints, +) where + +import Data.Time.Clock.POSIX + +import Annex.Common +import qualified Annex.Branch +import Logs +import Logs.UUIDBased + +import qualified Data.Map as M + +newtype Fingerprint = Fingerprint String + deriving (Eq, Read, Show) + +recordFingerprint :: Fingerprint -> UUID -> Annex () +recordFingerprint fp uuid = do + ts <- liftIO getPOSIXTime + Annex.Branch.change multicastLog $ + showLog show . changeLog ts uuid fp . parseLog readish + +knownFingerPrints :: Annex (M.Map UUID Fingerprint) +knownFingerPrints = simpleMap . parseLog readish <$> Annex.Branch.get activityLog diff --git a/Utility/FileMode.hs b/Utility/FileMode.hs index fe9cbf56a3..d9a269448f 100644 --- a/Utility/FileMode.hs +++ b/Utility/FileMode.hs @@ -177,7 +177,10 @@ writeFileProtected file content = writeFileProtected' file (\h -> hPutStr h content) writeFileProtected' :: FilePath -> (Handle -> IO ()) -> IO () -writeFileProtected' file writer = withUmask 0o0077 $ +writeFileProtected' file writer = protectedOutput $ withFile file WriteMode $ \h -> do void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes writer h + +protectedOutput :: IO a -> IO a +protectedOutput = withUmask 0o0077 diff --git a/debian/control b/debian/control index 7706d943f7..fa68c87fe7 100644 --- a/debian/control +++ b/debian/control @@ -116,6 +116,7 @@ Suggests: magic-wormhole, tahoe-lafs, libnss-mdns, + uftp, Description: manage files with git, without checking their contents into git git-annex allows managing files with git, without checking the file contents into git. While that may seem paradoxical, it is useful when diff --git a/doc/git-annex-multicast.mdwn b/doc/git-annex-multicast.mdwn new file mode 100644 index 0000000000..87b6310b74 --- /dev/null +++ b/doc/git-annex-multicast.mdwn @@ -0,0 +1,95 @@ +# NAME + +git-annex multicast - multicast file distribution + +# SYNOPSIS + +git annex multicast [options] + +# DESCRIPTION + +Multicast allows files to be broadcast to multiple receivers, +typically on a single local network. + +The uftp program is used for multicast. + + +# OPTIONS + +* `--gen-address` + + Generates a multicast encryption key and stores a corresponding multicast + address to the git-annex branch. + +* `--send [file]` + + Sends the specified files to any receivers whose multicast addresses + are stored in the git-annex branch. + + When no files are specified, all annexed files in the current directory + and subdirectories are sent. + + The [[git-annex-matching-options]] can be used to control which files to + send. For example: + + git annex multicast send . --not --copies 2 + +* `--receive` + + Receives files from senders whose multicast addresses + are stored in the git-annex brach. + + As each file is received, its filename is displayed. This is the filename + that the sender used; the local working tree may use a different name + for the file, or not contain a link to the file. + + This command continues running, until it is interrupted by you pressing + ctrl-c. + + Note that the configured annex.diskreserve is not honored by this + command, because `uftpd` receives the actual files, and can receive + any size file. + +* `--uftp-opt=option` `-Uoption` + + Pass an option on to the uftp/uftpd command. May be specified multiple + times. + + For example, to broadcast at 50 Mbps: + + git annex multicast send -U-R -U50000 + +# EXAMPLE + +Suppose a teacher wants to multicast files to students in a classroom. + +This assumes that the teacher and students have cloned a git-annex +repository, and both can push changes to its git-annex branch, +or otherwise push changes to each-other. + +First, the teacher runs `git annex multicast --gen-address; git annex sync` + +Next, students each run `git annex multicast --gen-address; git annex sync` + +Once all the students have generated addresses, the teacher runs +`git annex sync` once more. (Now the students all have received the +teacher's address, and the teacher has received all the student's addresses.) + +Next students each run `git annex multicast --receive` + +Finally, once the students are all listening (ahem), teacher runs +`git annex multicast --send` + +# SEE ALSO + +[[git-annex]](1) + +uftp(1) + +uftpd(1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 0add5a5371..07b8b19e19 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -164,6 +164,12 @@ subdirectories). See [[git-annex-undo]](1) for details. +* `multicast` + + Multicast file distribution. + + See [[git-annex-multicast]](1) for details. + * `watch` Watch for changes and autocommit. diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 78d0c8d472..00b65d2d13 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -288,3 +288,7 @@ that should prevent merging. Example: e605dca6-446a-11e0-8b2a-002170d25c55 [ObjectHashLower] timestamp=1422387398.30395s + +## `multicast.log` + +Records uftp public key fingerprints, for use by [[git-annex-multicast]]. diff --git a/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn b/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn index fa240bdf5b..bd34770724 100644 --- a/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn +++ b/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn @@ -5,3 +5,6 @@ Although I haven't remembered that "hash thing" to perform the job, we looked ar What do you think? [[!meta name=yoh]] + +> [[done]]! I've only tested it with sender and receiver on the same +> laptop, but it seems to work. --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index db868d1587..200ea30ae4 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -90,6 +90,7 @@ Extra-Source-Files: doc/git-annex-migrate.mdwn doc/git-annex-mirror.mdwn doc/git-annex-move.mdwn + doc/git-annex-multicast.mdwn doc/git-annex-numcopies.mdwn doc/git-annex-p2p.mdwn doc/git-annex-pre-commit.mdwn @@ -318,7 +319,7 @@ Executable git-annex stm (>= 2.3), mtl (>= 2), uuid (>= 1.2.6), - process, + process (>= 1.4.2.0), data-default, case-insensitive, random, @@ -518,6 +519,7 @@ Executable git-annex Annex.MakeRepo Annex.MetaData Annex.MetaData.StandardFields + Annex.Multicast Annex.Notification Annex.NumCopies Annex.Path @@ -732,6 +734,7 @@ Executable git-annex Command.Migrate Command.Mirror Command.Move + Command.Multicast Command.NotifyChanges Command.NumCopies Command.P2P @@ -857,6 +860,7 @@ Executable git-annex Logs.Location Logs.MapLog Logs.MetaData + Logs.Multicast Logs.NumCopies Logs.PreferredContent Logs.PreferredContent.Raw