reorg and add a start of a design for requests and ad-hoc routing with TTL in git-annex branch

This commit is contained in:
Joey Hess 2014-05-06 15:25:48 -03:00
parent b6c52568a8
commit 5230c78bb6
3 changed files with 131 additions and 58 deletions

View file

@ -74,21 +74,4 @@ cloud for a while.
## syncing only requested content
In some situations, nodes only want particular files, and not everything.
(Or don't have the bandwidth to get everything.) A way to handle this,
that should work in a fully ad-hoc, offline distributed network,
suggested by Vincenzo Tozzi:
* Nodes generate a request for a specific file they want, committed
to git somewhere.
* This request has a TTL (of eg 3).
* When syncing, copy the requests that a node has, and decrease their TTL
by 1. Requests with a TTL of 0 have timed out and are not copied.
(So, requests are stored in git, but on eg, per-node branches.)
* Only copy content to nodes that have a request for it (either one
originating with them, or one they copied from another node).
* Each request indicates the requesting node, so once no nodes have an
active request for a particular file, it's ok to drop it from the
transfer nodes (honoring numcopies etc of course).
A simulation of a network using this method is in [[simroutes.hs]]
See [[adhoc_routing]]

View file

@ -0,0 +1,59 @@
## requesting content
In some situations, nodes only want particular files, and not everything.
(Or don't have the bandwidth to get everything.) A way to handle this,
that should work in a fully ad-hoc, offline distributed network,
suggested by Vincenzo Tozzi:
* Nodes generate a request for a specific file they want, committed
to git somewhere.
* This request has a TTL (of eg 3 or 4).
* When syncing, copy the requests that a node has, and decrease their TTL
by 1. Requests with a TTL of 0 have timed out and are not copied.
(So, requests are stored in git, but on eg, per-node branches.)
* Only copy content to nodes that have a request for it (either one
originating with them, or one they copied from another node).
* Each request indicates the requesting node, so once no nodes have an
active request for a particular file, it's ok to drop it from the
transfer nodes (honoring numcopies etc of course).
A simulation of a network using this method is in [[simroutes.hs]]
## storing requests
Requests could be stored in the location tracking file.
Currently:
time 0|1 uuid1
time 0|1 uuid2
* Use negative numbers for the TTL of a request.
* To propigate a request, set -1 * (TTL+1) in the line
for the uuid of the repository that is propigating it.
This should be done as part of the git-annex branch merging,
so if a location tracking file is merged, any open requests
get propigated to the current repository.
* When a requested file reaches a node that requested it,
the location is set to 1; this automatically clears the
request.
## generating requests
git annex request [file...]
Indicates that the file is wanted in the current repository.
(git annex get could also do this on failure, or suggest doing this)
## acting on requests
Add a preferred content expression that looks at request data:
requestedby=N
Matches files that have been requested by at least N nodes.
requested
Matches files that the current node has requested.

View file

@ -9,6 +9,7 @@ import Control.Applicative
import Data.Ratio
import Data.Ord
import Data.List
import Data.Maybe
import qualified Data.Set as S
import qualified Data.Map.Strict as M
@ -38,11 +39,9 @@ transferMoveFrequencyRange = (0.10, 1.00)
-- counts both immobile and transfer nodes as hops, so double Vince's
-- theoretical TTL of 3.
-- (30% loss on mocambos network w/o ttl of 4!)
maxTTL :: TTL
maxTTL = TTL 6
minTTL :: TTL
minTTL = TTL 1
maxTTL = TTL (4 * 2)
numImmobileNodes :: Int
numImmobileNodes = 10
@ -54,11 +53,17 @@ numSteps :: Int
numSteps = 100
-- IO code
--main = putStrLn . summarize =<< evalRandIO (simulate numSteps =<< genNetwork)
main :: IO ()
main = do
initialnetwork <- evalRandIO mocambos
putStrLn . summarize initialnetwork
=<< evalRandIO (simulate numSteps initialnetwork)
-- initialnetwork <- evalRandIO (seedFiles totalFiles =<< genNetwork)
initialnetwork <- evalRandIO (seedFiles totalFiles =<< mocambosNetwork)
networks <- evalRandIO (simulate numSteps initialnetwork)
let finalnetwork = last networks
putStrLn $ summarize initialnetwork finalnetwork
putStrLn "location history of file 1:"
print $ trace (traceHaveFile (File 1)) networks
putStrLn "request history of file 1:"
print $ trace (traceWantFile (File 1)) networks
-- Only pure code below :)
data Network = Network (M.Map NodeName ImmobileNode) [TransferNode]
@ -131,9 +136,14 @@ type Probability = Float
randomProbability :: (RandomGen g) => Rand g Probability
randomProbability = getRandomR (0, 1)
simulate :: (RandomGen g) => Int -> Network -> Rand g Network
simulate 0 net = return net
simulate c net = simulate (c - 1) =<< step net
-- Returns the state of the network at each step of the simulation.
simulate :: (RandomGen g) => Int -> Network -> Rand g [Network]
simulate n net = go n [net]
where
go 0 nets = return (reverse nets)
go c (prev:nets) = do
new <- step prev
go (c - 1) (new:prev:nets)
-- Each step of the simulation, check if each TransferNode wants to move,
-- and if so:
@ -197,30 +207,12 @@ move t = do
genNetwork :: (RandomGen g) => Rand g Network
genNetwork = do
l <- sequence (replicate numImmobileNodes mkImmobile)
let immobiles = M.fromList (zip (map show [1..]) l)
let immobiles = M.fromList (zip (map show [1..]) (replicate numImmobileNodes emptyImmobile))
transfers <- sequence (replicate numTransferNodes (mkTransfer $ M.keys immobiles))
return $ Network immobiles transfers
mkImmobile :: (RandomGen g) => Rand g ImmobileNode
mkImmobile = ImmobileNode <$> mkImmobileRepo
mkImmobileRepo :: (RandomGen g) => Rand g NodeRepo
mkImmobileRepo = NodeRepo
-- The files this node wants.
-- Currently assumes each file is equally popular.
<$> sequence (replicate (truncate (fromIntegral totalFiles * probabilityFilesWanted)) randomRequest)
-- The files this node already has.
--
-- We'll assume equal production, so split the total
-- number of files amoung the immobile nodes.
-- (This will produce some duplication of files
-- (consider birthday paradox), and some missing files.)
--
-- TODO: Some immobile nodes are internet connected,
-- and these should all share their files automatically)
-- (Also when running the sim.)
<*> (S.fromList <$> sequence (replicate (totalFiles `div` numImmobileNodes) randomFile))
emptyImmobile :: ImmobileNode
emptyImmobile = ImmobileNode (NodeRepo [] S.empty)
mkTransfer :: (RandomGen g) => [NodeName] -> Rand g TransferNode
mkTransfer immobiles = do
@ -243,6 +235,27 @@ randomfrom l = do
i <- getRandomR (1, length l)
return $ l !! (i - 1)
-- Seeds the network with the given number of files. Each file is added to
-- one of the immobile nodes of the network at random. And, one other node,
-- at random, is selected which wants to get the file.
seedFiles :: (RandomGen g) => Int -> Network -> Rand g Network
seedFiles 0 network = return network
seedFiles n network@(Network m t) = do
(origink, ImmobileNode originr) <- randnode
(destinationk, ImmobileNode destinationr) <- randnode
let file = File n
let origin = ImmobileNode $ originr
{ haveFiles = S.insert file (haveFiles originr) }
let destination = ImmobileNode $ destinationr
{ wantFiles = Request file originTTL : wantFiles destinationr }
let m' = M.insert origink origin $
M.insert destinationk destination m
seedFiles (n - 1) (Network m' t)
where
randnode = do
k <- randomfrom (M.keys m)
return (k, fromJust $ M.lookup k m)
summarize :: Network -> Network -> String
summarize _initial@(Network origis _) _final@(Network is _ts) = format
[ ("Total wanted files",
@ -270,19 +283,33 @@ summarize _initial@(Network origis _) _final@(Network is _ts) = format
concatMap (\(k, v) -> map (\f -> (f, k)) (S.toList $ haveFiles $ repo v)) $
M.toList origis
mocambos :: (RandomGen g) => Rand g Network
mocambos = do
major <- mapM (immobilenamed . fst) communities
minor <- mapM immobilenamed (concatMap snd communities)
trace :: (Network -> S.Set NodeName) -> [Network] -> String
trace tracer networks = show $ go [] S.empty $ map tracer networks
where
go c old [] = reverse c
go c old (new:l) = go ((S.toList $ new `S.difference` old):c) new l
traceHaveFile :: File -> Network -> S.Set NodeName
traceHaveFile f (Network m _) = S.fromList $ M.keys $
M.filter (\(ImmobileNode r) -> f `S.member` haveFiles r) m
traceWantFile :: File -> Network -> S.Set NodeName
traceWantFile f (Network m _) = S.fromList $ M.keys $
M.filter (\(ImmobileNode r) -> any wantf (wantFiles r)) m
where
wantf (Request rf _ttl) = rf == f
mocambosNetwork :: (RandomGen g) => Rand g Network
mocambosNetwork = do
let major = map (immobilenamed . fst) communities
let minor = map immobilenamed (concatMap snd communities)
majortransfer <- mapM mkTransferBetween majorroutes
minortransfer <- mapM mkTransferBetween (concatMap minorroutes communities)
minortransfer <- mapM mkTransferBetween (concatMap minorroutes (concat (replicate 5 communities)))
return $ Network
(M.fromList (major++minor))
(majortransfer ++ minortransfer)
where
immobilenamed name = do
node <- mkImmobile
return (name, node)
immobilenamed name = (name, emptyImmobile)
-- As a simplification, this only makes 2 hop routes, between minor
-- and major communities; no 3-legged routes.
@ -353,4 +380,8 @@ majorroutes =
, ["Boneco/BA", "Coco/PE"]
-- internet connections
, ["Tainá/SP", "MercadoSul/DF", "Coco/PE", "Purarue/PA", "Odomode/RS", "Madiba/NET"]
, ["Tainá/SP", "MercadoSul/DF", "Coco/PE", "Purarue/PA", "Odomode/RS", "Madiba/NET"]
, ["Tainá/SP", "MercadoSul/DF", "Coco/PE", "Purarue/PA", "Odomode/RS", "Madiba/NET"]
, ["Tainá/SP", "MercadoSul/DF", "Coco/PE", "Purarue/PA", "Odomode/RS", "Madiba/NET"]
, ["Tainá/SP", "MercadoSul/DF", "Coco/PE", "Purarue/PA", "Odomode/RS", "Madiba/NET"]
]