add web special remote

Generalized LocationLog to PresenceLog, and use a presence log to record
urls for the web special remote.
This commit is contained in:
Joey Hess 2011-07-01 15:24:07 -04:00
parent ceb887d826
commit cdbcd6f495
17 changed files with 272 additions and 139 deletions

View file

@ -135,8 +135,8 @@ showLocations key exclude = do
untrusteduuids <- trustGet UnTrusted
let uuidswanted = filteruuids uuids (u:exclude++untrusteduuids)
let uuidsskipped = filteruuids uuids (u:exclude++uuidswanted)
ppuuidswanted <- prettyPrintUUIDs uuidswanted
ppuuidsskipped <- prettyPrintUUIDs uuidsskipped
ppuuidswanted <- Remote.prettyPrintUUIDs uuidswanted
ppuuidsskipped <- Remote.prettyPrintUUIDs uuidsskipped
showLongNote $ message ppuuidswanted ppuuidsskipped
where
filteruuids list x = filter (`notElem` x) list
@ -195,7 +195,7 @@ checkKeyNumCopies key file numcopies = do
let present = length safelocations
if present < needed
then do
ppuuids <- prettyPrintUUIDs untrustedlocations
ppuuids <- Remote.prettyPrintUUIDs untrustedlocations
warning $ missingNote (filename file key) present needed ppuuids
return False
else return True

View file

@ -51,7 +51,7 @@ perform (file, backend) = do
cleanup :: FilePath -> Key -> CommandCleanup
cleanup file key = do
logStatus key ValuePresent
logStatus key InfoPresent
link <- calcGitLink file key
liftIO $ createSymbolicLink link file

View file

@ -45,5 +45,5 @@ perform key backend numcopies = do
cleanup :: Key -> CommandCleanup
cleanup key = do
whenM (inAnnex key) $ removeAnnex key
logStatus key ValueMissing
logStatus key InfoMissing
return True

View file

@ -40,5 +40,5 @@ perform key = do
cleanup :: Key -> CommandCleanup
cleanup key = do
logStatus key ValueMissing
logStatus key InfoMissing
return True

View file

@ -64,11 +64,11 @@ verifyLocationLog key file = do
case (present, u `elem` uuids) of
(True, False) -> do
fix g u ValuePresent
fix g u InfoPresent
-- There is no data loss, so do not fail.
return True
(False, True) -> do
fix g u ValueMissing
fix g u InfoMissing
warning $
"** Based on the location log, " ++ file
++ "\n** was expected to be present, " ++

View file

@ -58,7 +58,7 @@ remoteHasKey remote key present = do
g <- Annex.gitRepo
logChange g key remoteuuid status
where
status = if present then ValuePresent else ValueMissing
status = if present then InfoPresent else InfoMissing
{- Moves (or copies) the content of an annexed file to a remote.
-

View file

@ -46,5 +46,5 @@ perform file = do
cleanup :: CommandCleanup
cleanup = do
key <- cmdlineKey
logStatus key ValuePresent
logStatus key InfoPresent
return True

View file

@ -65,7 +65,7 @@ cleanup file key = do
liftIO $ createDirectoryIfMissing True (parentDir file)
fromAnnex key file
logStatus key ValueMissing
logStatus key InfoMissing
-- Commit staged changes at end to avoid confusing the
-- pre-commit hook if this file is later added back to

View file

@ -10,7 +10,7 @@ module Command.Whereis where
import LocationLog
import Command
import Messages
import UUID
import Remote
import Types
command :: [Command]

View file

@ -65,12 +65,7 @@ calcGitLink file key = do
whoops = error $ "unable to normalize " ++ file
{- Updates the LocationLog when a key's presence changes in the current
- repository.
-
- Note that the LocationLog is not updated in bare repositories.
- Operations that change a bare repository should be done from
- a non-bare repository, and the LocationLog in that repository be
- updated instead. -}
- repository. -}
logStatus :: Key -> LogStatus -> Annex ()
logStatus key status = do
g <- Annex.gitRepo
@ -112,7 +107,7 @@ getViaTmpUnchecked key action = do
if success
then do
moveAnnex key tmp
logStatus key ValuePresent
logStatus key InfoPresent
return True
else do
-- the tmp file is left behind, in case caller wants
@ -240,7 +235,7 @@ moveBad key = do
allowWrite (parentDir src)
renameFile src dest
removeDirectory (parentDir src)
logStatus key ValueMissing
logStatus key InfoMissing
return dest
{- List of keys whose content exists in .git/annex/objects/ -}

View file

@ -5,11 +5,6 @@
-
- Repositories record their UUID and the date when they --get or --drop
- a value.
-
- A line of the log will look like: "date N UUID"
- Where N=1 when the repo has the file, and 0 otherwise.
- (After the UUID can optionally come a white space and other data,
- for future expansion.)
-
- Copyright 2010-2011 Joey Hess <joey@kitenet.net>
-
@ -25,61 +20,16 @@ module LocationLog (
loggedKeys
) where
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import System.FilePath
import qualified Data.Map as Map
import Control.Monad (when)
import Data.Maybe
import Control.Monad.State (liftIO)
import qualified Git
import qualified Branch
import UUID
import Types
import Locations
data LogLine = LogLine {
date :: POSIXTime,
status :: LogStatus,
uuid :: UUID
} deriving (Eq)
data LogStatus = ValuePresent | ValueMissing | Undefined
deriving (Eq)
instance Show LogStatus where
show ValuePresent = "1"
show ValueMissing = "0"
show Undefined = "undefined"
instance Read LogStatus where
readsPrec _ "1" = [(ValuePresent, "")]
readsPrec _ "0" = [(ValueMissing, "")]
readsPrec _ _ = [(Undefined, "")]
instance Show LogLine where
show (LogLine d s u) = unwords [show d, show s, u]
instance Read LogLine where
-- This parser is robust in that even unparsable log lines are
-- read without an exception being thrown.
-- Such lines have a status of Undefined.
readsPrec _ string =
if length w >= 3
then maybe bad good pdate
else bad
where
w = words string
s = read $ w !! 1
u = w !! 2
pdate :: Maybe UTCTime
pdate = parseTime defaultTimeLocale "%s%Qs" $ head w
good v = ret $ LogLine (utcTimeToPOSIXSeconds v) s u
bad = ret $ LogLine 0 Undefined ""
ret v = [(v, "")]
import PresenceLog
{- Log a change in the presence of a key's value in a repository. -}
logChange :: Git.Repo -> Key -> UUID -> LogStatus -> Annex ()
@ -92,59 +42,10 @@ logChange repo key u s = do
ls <- readLog f
writeLog f (compactLog $ line:ls)
{- Reads a log file.
- Note that the LogLines returned may be in any order. -}
readLog :: FilePath -> Annex [LogLine]
readLog file = return . parseLog =<< Branch.get file
parseLog :: String -> [LogLine]
parseLog s = filter parsable $ map read $ lines s
where
-- some lines may be unparseable, avoid them
parsable l = status l /= Undefined
{- Stores a set of lines in a log file -}
writeLog :: FilePath -> [LogLine] -> Annex ()
writeLog file ls = Branch.change file (unlines $ map show ls)
{- Generates a new LogLine with the current date. -}
logNow :: LogStatus -> UUID -> Annex LogLine
logNow s u = do
now <- liftIO $ getPOSIXTime
return $ LogLine now s u
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -}
keyLocations :: Key -> Annex [UUID]
keyLocations key = do
ls <- readLog $ logFile key
return $ map uuid $ filterPresent ls
{- Filters the list of LogLines to find ones where the value
- is (or should still be) present. -}
filterPresent :: [LogLine] -> [LogLine]
filterPresent ls = filter (\l -> ValuePresent == status l) $ compactLog ls
type LogMap = Map.Map UUID LogLine
{- Compacts a set of logs, returning a subset that contains the current
- status. -}
compactLog :: [LogLine] -> [LogLine]
compactLog ls = compactLog' Map.empty ls
compactLog' :: LogMap -> [LogLine] -> [LogLine]
compactLog' m [] = Map.elems m
compactLog' m (l:ls) = compactLog' (mapLog m l) ls
{- Inserts a log into a map of logs, if the log has better (ie, newer)
- information about a repo than the other logs in the map -}
mapLog :: LogMap -> LogLine -> LogMap
mapLog m l =
if better
then Map.insert u l m
else m
where
better = maybe True (\l' -> date l' <= date l) $ Map.lookup u m
u = uuid l
keyLocations key = currentLog $ logFile key
{- Finds all keys that have location log information.
- (There may be duplicate keys in the list.) -}

123
PresenceLog.hs Normal file
View file

@ -0,0 +1,123 @@
{- git-annex presence log
-
- This is used to store presence information in the git-annex branch in
- a way that can be union merged.
-
- A line of the log will look like: "date N INFO"
- Where N=1 when the INFO is present, and 0 otherwise.
-
- Copyright 2010-2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module PresenceLog (
LogStatus(..),
readLog,
writeLog,
logNow,
compactLog,
currentLog
) where
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import qualified Data.Map as Map
import Control.Monad.State (liftIO)
import qualified Branch
import Types
data LogLine = LogLine {
date :: POSIXTime,
status :: LogStatus,
info :: String
} deriving (Eq)
data LogStatus = InfoPresent | InfoMissing | Undefined
deriving (Eq)
instance Show LogStatus where
show InfoPresent = "1"
show InfoMissing = "0"
show Undefined = "undefined"
instance Read LogStatus where
readsPrec _ "1" = [(InfoPresent, "")]
readsPrec _ "0" = [(InfoMissing, "")]
readsPrec _ _ = [(Undefined, "")]
instance Show LogLine where
show (LogLine d s i) = unwords [show d, show s, i]
instance Read LogLine where
-- This parser is robust in that even unparsable log lines are
-- read without an exception being thrown.
-- Such lines have a status of Undefined.
readsPrec _ string =
if length w >= 3
then maybe bad good pdate
else bad
where
w = words string
s = read $ w !! 1
i = w !! 2
pdate :: Maybe UTCTime
pdate = parseTime defaultTimeLocale "%s%Qs" $ head w
good v = ret $ LogLine (utcTimeToPOSIXSeconds v) s i
bad = ret $ LogLine 0 Undefined ""
ret v = [(v, "")]
{- Reads a log file.
- Note that the LogLines returned may be in any order. -}
readLog :: FilePath -> Annex [LogLine]
readLog file = return . parseLog =<< Branch.get file
parseLog :: String -> [LogLine]
parseLog s = filter parsable $ map read $ lines s
where
-- some lines may be unparseable, avoid them
parsable l = status l /= Undefined
{- Stores a set of lines in a log file -}
writeLog :: FilePath -> [LogLine] -> Annex ()
writeLog file ls = Branch.change file (unlines $ map show ls)
{- Generates a new LogLine with the current date. -}
logNow :: LogStatus -> String -> Annex LogLine
logNow s i = do
now <- liftIO $ getPOSIXTime
return $ LogLine now s i
{- Reads a log and returns only the info that is still in effect. -}
currentLog :: FilePath -> Annex [String]
currentLog file = do
ls <- readLog file
return $ map info $ filterPresent ls
{- Returns the info from LogLines that are in effect. -}
filterPresent :: [LogLine] -> [LogLine]
filterPresent ls = filter (\l -> InfoPresent == status l) $ compactLog ls
type LogMap = Map.Map String LogLine
{- Compacts a set of logs, returning a subset that contains the current
- status. -}
compactLog :: [LogLine] -> [LogLine]
compactLog ls = compactLog' Map.empty ls
compactLog' :: LogMap -> [LogLine] -> [LogLine]
compactLog' m [] = Map.elems m
compactLog' m (l:ls) = compactLog' (mapLog m l) ls
{- Inserts a log into a map of logs, if the log has better (ie, newer)
- information than the other logs in the map -}
mapLog :: LogMap -> LogLine -> LogMap
mapLog m l =
if better
then Map.insert i l m
else m
where
better = maybe True (\l' -> date l' <= date l) $ Map.lookup i m
i = info l

View file

@ -24,6 +24,7 @@ module Remote (
nameToUUID,
remotesWithUUID,
remotesWithoutUUID,
prettyPrintUUIDs,
remoteLog,
readRemoteLog,
@ -34,7 +35,7 @@ module Remote (
prop_idempotent_configEscape
) where
import Control.Monad (filterM)
import Control.Monad (filterM, liftM2)
import Data.List
import qualified Data.Map as M
import Data.Maybe
@ -54,6 +55,7 @@ import qualified Remote.S3
import qualified Remote.Bup
import qualified Remote.Directory
import qualified Remote.Rsync
import qualified Remote.Web
import qualified Remote.Hook
remoteTypes :: [RemoteType Annex]
@ -63,6 +65,7 @@ remoteTypes =
, Remote.Bup.remote
, Remote.Directory.remote
, Remote.Rsync.remote
, Remote.Web.remote
, Remote.Hook.remote
]
@ -120,6 +123,24 @@ nameToUUID n = do
invertMap = M.fromList . map swap . M.toList
swap (a, b) = (b, a)
{- Pretty-prints a list of UUIDs of remotes. -}
prettyPrintUUIDs :: [UUID] -> Annex String
prettyPrintUUIDs uuids = do
here <- getUUID =<< Annex.gitRepo
-- Show descriptions from the uuid log, falling back to remote names,
-- as some remotes may not be in the uuid log.
m <- liftM2 M.union uuidMap $
return . M.fromList . map (\r -> (uuid r, name r)) =<< genList
return $ unwords $ map (\u -> "\t" ++ prettify m u here ++ "\n") uuids
where
prettify m u here = base ++ ishere
where
base = if not $ null $ findlog m u
then u ++ " -- " ++ findlog m u
else u
ishere = if here == u then " <-- here" else ""
findlog m u = M.findWithDefault "" u m
{- Filters a list of remotes to ones that have the listed uuids. -}
remotesWithUUID :: [Remote Annex] -> [UUID] -> [Remote Annex]
remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs

107
Remote/Web.hs Normal file
View file

@ -0,0 +1,107 @@
{- Web remotes.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remote.Web (
remote
) where
import Control.Monad.State (liftIO)
import Control.Exception
import System.FilePath
import Network.Curl.Easy
import Network.Curl.Opts
import Network.Curl.Types
import Network.Curl.Code
import Types
import Types.Remote
import qualified Git
import Messages
import Utility
import UUID
import Config
import PresenceLog
remote :: RemoteType Annex
remote = RemoteType {
typename = "web",
enumerate = list,
generate = gen,
setup = error "not supported"
}
-- There is only one web remote, and it always exists.
-- (If the web should cease to exist, remove this module and redistribute
-- a new release to the survivors by carrier pigeon.)
list :: Annex [Git.Repo]
list = return [Git.repoRemoteNameSet Git.repoFromUnknown "remote.web.dummy"]
-- Dummy uuid for the whole web. Do not alter.
webUUID :: UUID
webUUID = "00000000-0000-0000-0000-000000000001"
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex)
gen r _ _ =
return $ Remote {
uuid = webUUID,
cost = expensiveRemoteCost,
name = Git.repoDescribe r,
storeKey = upload,
retrieveKeyFile = download,
removeKey = remove,
hasKey = check,
hasKeyCheap = False,
config = Nothing
}
{- The urls for a key are stored in remote/web/key.log in the git-annex branch. -}
urlLog :: Key -> FilePath
urlLog key = "remote/web" </> show key ++ ".log"
urls :: Key -> Annex [URLString]
urls key = currentLog (urlLog key)
download :: Key -> FilePath -> Annex Bool
download key file = download' file =<< urls key
download' :: FilePath -> [URLString] -> Annex Bool
download' _ [] = return False
download' file (url:us) = do
showProgress -- make way for curl progress bar
ok <- liftIO $ boolSystem "curl" [Params "-# -o", File file, File url]
if ok then return ok else download' file us
upload :: Key -> Annex Bool
upload _ = do
warning "upload to web not supported"
return False
remove :: Key -> Annex Bool
remove _ = do
warning "removal from web not supported"
return False
check :: Key -> Annex (Either IOException Bool)
check key = do
us <- urls key
if null us
then return $ Right False
else return . Right =<< check' us
check' :: [URLString] -> Annex Bool
check' [] = return False
check' (u:us) = do
showNote ("checking " ++ u)
e <- liftIO $ urlexists u
if e then return e else check' us
urlexists :: URLString -> IO Bool
urlexists url = do
curl <- initialize
_ <- setopt curl (CurlURL url)
_ <- setopt curl (CurlNoBody True)
_ <- setopt curl (CurlFailOnError True)
res <- perform curl
return $ res == CurlOK

16
UUID.hs
View file

@ -17,7 +17,6 @@ module UUID (
getUncachedUUID,
prepUUID,
genUUID,
prettyPrintUUIDs,
describeUUID,
uuidMap,
uuidLog
@ -86,21 +85,6 @@ prepUUID = do
uuid <- liftIO $ genUUID
setConfig configkey uuid
{- Pretty-prints a list of UUIDs -}
prettyPrintUUIDs :: [UUID] -> Annex String
prettyPrintUUIDs uuids = do
here <- getUUID =<< Annex.gitRepo
m <- uuidMap
return $ unwords $ map (\u -> "\t" ++ prettify m u here ++ "\n") uuids
where
prettify m u here = base ++ ishere
where
base = if not $ null $ findlog m u
then u ++ " -- " ++ findlog m u
else u
ishere = if here == u then " <-- here" else ""
findlog m u = M.findWithDefault "" u m
{- Records a description for a uuid in the uuidLog. -}
describeUUID :: UUID -> String -> Annex ()
describeUUID uuid desc = do

2
debian/changelog vendored
View file

@ -1,5 +1,7 @@
git-annex (3.20110625) UNRELEASED; urgency=low
* Now the web can be used as a special remote. This feature
replaces the old URL backend.
* Sped back up fsck, copy --from, and other commands that often
have to read a lot of information from the git-annex branch. Such
commands are now faster than they were before introduction of the

File diff suppressed because one or more lines are too long