2010-10-09 23:22:40 +00:00
|
|
|
{- git-annex location log
|
|
|
|
-
|
2010-10-13 00:04:36 +00:00
|
|
|
- git-annex keeps track of on which repository it last saw a value.
|
2010-10-09 23:22:40 +00:00
|
|
|
- This can be useful when using it for archiving with offline storage.
|
|
|
|
- When you indicate you --want a file, git-annex will tell you which
|
2010-10-13 00:04:36 +00:00
|
|
|
- repositories have the value.
|
2010-10-09 23:22:40 +00:00
|
|
|
-
|
2010-10-13 00:04:36 +00:00
|
|
|
- Location tracking information is stored in `.git-annex/key.log`.
|
2010-10-12 22:06:34 +00:00
|
|
|
- Repositories record their UUID and the date when they --get or --drop
|
2010-10-13 00:04:36 +00:00
|
|
|
- a value.
|
2010-10-09 23:22:40 +00:00
|
|
|
-
|
2010-10-12 22:06:34 +00:00
|
|
|
- A line of the log will look like: "date N UUID"
|
2010-10-10 02:29:16 +00:00
|
|
|
- Where N=1 when the repo has the file, and 0 otherwise.
|
2010-10-10 16:31:14 +00:00
|
|
|
-
|
|
|
|
- Git is configured to use a union merge for this file,
|
|
|
|
- so the lines may be in arbitrary order, but it will never conflict.
|
2010-10-27 20:53:54 +00:00
|
|
|
-
|
|
|
|
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
2010-10-09 23:22:40 +00:00
|
|
|
-}
|
|
|
|
|
2010-10-11 21:52:46 +00:00
|
|
|
module LocationLog (
|
2010-10-12 22:25:41 +00:00
|
|
|
LogStatus(..),
|
2010-10-13 19:55:18 +00:00
|
|
|
logChange,
|
2011-01-11 20:00:40 +00:00
|
|
|
logFile,
|
2010-11-15 17:00:43 +00:00
|
|
|
keyLocations
|
2010-10-11 21:52:46 +00:00
|
|
|
) where
|
2010-10-09 23:22:40 +00:00
|
|
|
|
2010-10-11 02:20:52 +00:00
|
|
|
import Data.Time.Clock.POSIX
|
|
|
|
import Data.Time
|
|
|
|
import System.Locale
|
2010-10-10 16:31:14 +00:00
|
|
|
import qualified Data.Map as Map
|
2010-10-10 03:35:05 +00:00
|
|
|
import System.Directory
|
2010-10-31 05:06:58 +00:00
|
|
|
import System.Posix.Process
|
2011-01-04 21:03:29 +00:00
|
|
|
import Control.Monad (when)
|
2010-10-16 20:20:49 +00:00
|
|
|
|
2010-10-14 06:36:41 +00:00
|
|
|
import qualified GitRepo as Git
|
2010-10-10 02:46:35 +00:00
|
|
|
import Utility
|
2010-10-12 22:06:34 +00:00
|
|
|
import UUID
|
2010-10-14 07:18:11 +00:00
|
|
|
import Types
|
2010-10-10 19:54:02 +00:00
|
|
|
import Locations
|
2010-10-09 23:22:40 +00:00
|
|
|
|
2010-10-11 02:20:52 +00:00
|
|
|
data LogLine = LogLine {
|
|
|
|
date :: POSIXTime,
|
|
|
|
status :: LogStatus,
|
2010-10-12 22:06:34 +00:00
|
|
|
uuid :: UUID
|
2010-10-11 02:20:52 +00:00
|
|
|
} deriving (Eq)
|
|
|
|
|
2010-10-13 00:04:36 +00:00
|
|
|
data LogStatus = ValuePresent | ValueMissing | Undefined
|
2010-10-10 02:29:16 +00:00
|
|
|
deriving (Eq)
|
|
|
|
|
|
|
|
instance Show LogStatus where
|
2010-10-13 00:04:36 +00:00
|
|
|
show ValuePresent = "1"
|
|
|
|
show ValueMissing = "0"
|
2010-10-10 02:29:16 +00:00
|
|
|
show Undefined = "undefined"
|
|
|
|
|
|
|
|
instance Read LogStatus where
|
2010-10-13 00:04:36 +00:00
|
|
|
readsPrec _ "1" = [(ValuePresent, "")]
|
|
|
|
readsPrec _ "0" = [(ValueMissing, "")]
|
2010-10-10 02:29:16 +00:00
|
|
|
readsPrec _ _ = [(Undefined, "")]
|
|
|
|
|
2010-10-09 23:22:40 +00:00
|
|
|
instance Show LogLine where
|
2010-10-31 19:50:07 +00:00
|
|
|
show (LogLine d s u) = unwords [show d, show s, u]
|
2010-10-09 23:22:40 +00:00
|
|
|
|
|
|
|
instance Read LogLine where
|
2010-10-10 02:29:16 +00:00
|
|
|
-- This parser is robust in that even unparsable log lines are
|
|
|
|
-- read without an exception being thrown.
|
|
|
|
-- Such lines have a status of Undefined.
|
2010-10-10 04:02:07 +00:00
|
|
|
readsPrec _ string =
|
2010-11-22 19:46:57 +00:00
|
|
|
if length w == 3
|
|
|
|
then case pdate of
|
2010-10-11 02:20:52 +00:00
|
|
|
Just v -> good v
|
2010-10-31 19:50:07 +00:00
|
|
|
Nothing -> bad
|
|
|
|
else bad
|
2010-10-09 23:22:40 +00:00
|
|
|
where
|
2010-10-10 04:02:07 +00:00
|
|
|
w = words string
|
2010-10-31 19:50:07 +00:00
|
|
|
s = read $ w !! 1
|
|
|
|
u = w !! 2
|
2010-11-22 19:46:57 +00:00
|
|
|
pdate :: Maybe UTCTime
|
|
|
|
pdate = parseTime defaultTimeLocale "%s%Qs" $ head w
|
2010-10-11 02:20:52 +00:00
|
|
|
|
2010-10-31 19:50:07 +00:00
|
|
|
good v = ret $ LogLine (utcTimeToPOSIXSeconds v) s u
|
2010-11-22 19:46:57 +00:00
|
|
|
bad = ret $ LogLine 0 Undefined ""
|
2010-10-11 02:20:52 +00:00
|
|
|
ret v = [(v, "")]
|
2010-10-09 23:22:40 +00:00
|
|
|
|
2010-10-26 20:15:29 +00:00
|
|
|
{- Log a change in the presence of a key's value in a repository,
|
|
|
|
- and returns the filename of the logfile. -}
|
2010-11-22 19:46:57 +00:00
|
|
|
logChange :: Git.Repo -> Key -> UUID -> LogStatus -> IO FilePath
|
2010-10-31 19:50:07 +00:00
|
|
|
logChange repo key u s = do
|
2011-01-04 21:03:29 +00:00
|
|
|
when (null u) $
|
|
|
|
error $ "bug detected: unknown UUID for " ++ Git.repoDescribe repo
|
2010-10-31 19:50:07 +00:00
|
|
|
line <- logNow s u
|
2010-10-13 00:04:36 +00:00
|
|
|
ls <- readLog logfile
|
2010-10-31 19:50:07 +00:00
|
|
|
writeLog logfile (compactLog $ line:ls)
|
2010-10-26 20:15:29 +00:00
|
|
|
return logfile
|
2010-10-12 22:25:41 +00:00
|
|
|
where
|
2010-10-13 00:04:36 +00:00
|
|
|
logfile = logFile repo key
|
2010-10-12 22:25:41 +00:00
|
|
|
|
2010-10-10 03:35:05 +00:00
|
|
|
{- Reads a log file.
|
|
|
|
- Note that the LogLines returned may be in any order. -}
|
2010-10-10 16:41:20 +00:00
|
|
|
readLog :: FilePath -> IO [LogLine]
|
2010-10-09 23:22:40 +00:00
|
|
|
readLog file = do
|
2010-10-10 03:35:05 +00:00
|
|
|
exists <- doesFileExist file
|
|
|
|
if exists
|
|
|
|
then do
|
more space saving by not locking location log for read
Actions that need to read all the location logs, like "git annex get .",
were still using a lot of memory, and profiling pointed at the location log
reading as the problem. Not locking them for read, and thus avoiding the
strict reading fixes the problem, although I don't quite understand why.
(Oddly, -sstderr profiling did not show the memory as used, though top
showed dozens of MB being used.)
Anyway, it's fine to not lock location logs for read, since the log format
and parser should be safe if a partial read of a file being written happens.
Note that that could easily happen anyway, if doing a git pull, etc,
especially if git needs to union merge in changes from elsewhere. The worst
that will happen is git-annex could get a bad or out of date idea about
locations and refuse to eg, --drop something.
2010-10-31 03:34:40 +00:00
|
|
|
s <- readFile file
|
2010-10-10 03:35:05 +00:00
|
|
|
-- filter out any unparsable lines
|
2010-11-22 19:46:57 +00:00
|
|
|
return $ filter (\l -> status l /= Undefined )
|
2010-10-10 03:35:05 +00:00
|
|
|
$ map read $ lines s
|
2010-11-22 19:46:57 +00:00
|
|
|
else return []
|
2010-10-09 23:22:40 +00:00
|
|
|
|
2010-10-10 16:31:14 +00:00
|
|
|
{- Writes a set of lines to a log file -}
|
2010-10-10 16:41:20 +00:00
|
|
|
writeLog :: FilePath -> [LogLine] -> IO ()
|
2010-10-31 19:50:07 +00:00
|
|
|
writeLog file ls = do
|
2010-10-31 05:06:58 +00:00
|
|
|
pid <- getProcessID
|
|
|
|
let tmpfile = file ++ ".tmp" ++ show pid
|
2010-10-10 16:31:14 +00:00
|
|
|
createDirectoryIfMissing True (parentDir file)
|
2010-10-31 19:50:07 +00:00
|
|
|
writeFile tmpfile $ unlines $ map show ls
|
2010-10-31 05:06:58 +00:00
|
|
|
renameFile tmpfile file
|
2010-10-10 16:31:14 +00:00
|
|
|
|
2010-10-10 02:46:35 +00:00
|
|
|
{- Generates a new LogLine with the current date. -}
|
2010-10-12 22:25:41 +00:00
|
|
|
logNow :: LogStatus -> UUID -> IO LogLine
|
2010-10-31 19:50:07 +00:00
|
|
|
logNow s u = do
|
2010-10-11 02:20:52 +00:00
|
|
|
now <- getPOSIXTime
|
2010-10-31 19:50:07 +00:00
|
|
|
return $ LogLine now s u
|
2010-10-10 02:14:13 +00:00
|
|
|
|
2010-10-13 00:04:36 +00:00
|
|
|
{- Returns the filename of the log file for a given key. -}
|
2010-10-14 06:36:41 +00:00
|
|
|
logFile :: Git.Repo -> Key -> String
|
2010-10-13 00:04:36 +00:00
|
|
|
logFile repo key =
|
2010-11-28 20:31:20 +00:00
|
|
|
gitStateDir repo ++ keyFile key ++ ".log"
|
2010-10-10 02:46:35 +00:00
|
|
|
|
2010-10-12 22:25:41 +00:00
|
|
|
{- Returns a list of repository UUIDs that, according to the log, have
|
2010-10-13 00:04:36 +00:00
|
|
|
- the value of a key. -}
|
2010-10-14 06:36:41 +00:00
|
|
|
keyLocations :: Git.Repo -> Key -> IO [UUID]
|
2010-10-13 00:04:36 +00:00
|
|
|
keyLocations thisrepo key = do
|
2010-10-31 19:50:07 +00:00
|
|
|
ls <- readLog $ logFile thisrepo key
|
|
|
|
return $ map uuid $ filterPresent ls
|
2010-10-10 02:46:35 +00:00
|
|
|
|
2010-10-13 00:04:36 +00:00
|
|
|
{- Filters the list of LogLines to find ones where the value
|
2010-10-10 02:46:35 +00:00
|
|
|
- is (or should still be) present. -}
|
|
|
|
filterPresent :: [LogLine] -> [LogLine]
|
2010-10-31 19:50:07 +00:00
|
|
|
filterPresent ls = filter (\l -> ValuePresent == status l) $ compactLog ls
|
|
|
|
|
|
|
|
type LogMap = Map.Map UUID LogLine
|
2010-10-10 16:31:14 +00:00
|
|
|
|
|
|
|
{- Compacts a set of logs, returning a subset that contains the current
|
|
|
|
- status. -}
|
|
|
|
compactLog :: [LogLine] -> [LogLine]
|
2010-10-31 19:50:07 +00:00
|
|
|
compactLog ls = compactLog' Map.empty ls
|
|
|
|
compactLog' :: LogMap -> [LogLine] -> [LogLine]
|
|
|
|
compactLog' m [] = Map.elems m
|
|
|
|
compactLog' m (l:ls) = compactLog' (mapLog m l) ls
|
2010-10-10 16:31:14 +00:00
|
|
|
|
|
|
|
{- Inserts a log into a map of logs, if the log has better (ie, newer)
|
|
|
|
- information about a repo than the other logs in the map -}
|
2010-10-31 19:50:07 +00:00
|
|
|
mapLog :: LogMap -> LogLine -> LogMap
|
|
|
|
mapLog m l =
|
2010-11-22 19:46:57 +00:00
|
|
|
if better
|
2010-10-31 19:50:07 +00:00
|
|
|
then Map.insert u l m
|
|
|
|
else m
|
2010-10-10 16:31:14 +00:00
|
|
|
where
|
2010-10-31 19:50:07 +00:00
|
|
|
better = case Map.lookup u m of
|
|
|
|
Just l' -> (date l' <= date l)
|
2010-10-10 16:31:14 +00:00
|
|
|
Nothing -> True
|
2010-10-31 19:50:07 +00:00
|
|
|
u = uuid l
|