Merge branch 'master' of /home/joey/src/git-annex

This commit is contained in:
Joey Hess 2010-10-19 19:38:24 -04:00
commit c397e5a0f3
56 changed files with 2685 additions and 0 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
build/*
git-annex
git-annex.1
doc/.ikiwiki
html

77
Annex.hs Normal file
View file

@ -0,0 +1,77 @@
{- git-annex monad -}
module Annex (
new,
run,
gitRepo,
gitRepoChange,
backends,
backendsChange,
supportedBackends,
flagIsSet,
flagChange,
Flag(..)
) where
import Control.Monad.State
import qualified GitRepo as Git
import Types
import qualified TypeInternals as Internals
{- Create and returns an Annex state object for the specified git repo.
-}
new :: Git.Repo -> [Backend] -> IO AnnexState
new gitrepo allbackends = do
let s = Internals.AnnexState {
Internals.repo = gitrepo,
Internals.backends = [],
Internals.supportedBackends = allbackends,
Internals.flags = []
}
(_,s') <- Annex.run s (prep gitrepo)
return s'
where
prep gitrepo = do
-- read git config and update state
gitrepo' <- liftIO $ Git.configRead gitrepo
Annex.gitRepoChange gitrepo'
-- performs an action in the Annex monad
run state action = runStateT (action) state
-- Annex monad state accessors
gitRepo :: Annex Git.Repo
gitRepo = do
state <- get
return (Internals.repo state)
gitRepoChange :: Git.Repo -> Annex ()
gitRepoChange r = do
state <- get
put state { Internals.repo = r }
return ()
backends :: Annex [Backend]
backends = do
state <- get
return (Internals.backends state)
backendsChange :: [Backend] -> Annex ()
backendsChange b = do
state <- get
put state { Internals.backends = b }
return ()
supportedBackends :: Annex [Backend]
supportedBackends = do
state <- get
return (Internals.supportedBackends state)
flagIsSet :: Flag -> Annex Bool
flagIsSet flag = do
state <- get
return $ elem flag $ Internals.flags state
flagChange :: Flag -> Bool -> Annex ()
flagChange flag set = do
state <- get
let f = filter (/= flag) $ Internals.flags state
if (set)
then put state { Internals.flags = (flag:f) }
else put state { Internals.flags = f }
return ()

116
Backend.hs Normal file
View file

@ -0,0 +1,116 @@
{- git-annex key-value storage backends
-
- git-annex uses a key-value abstraction layer to allow files contents to be
- stored in different ways. In theory, any key-value storage system could be
- used to store the file contents, and git-annex would then retrieve them
- as needed and put them in `.git/annex/`.
-
- When a file is annexed, a key is generated from its content and/or metadata.
- This key can later be used to retrieve the file's content (its value). This
- key generation must be stable for a given file content, name, and size.
-
- Multiple pluggable backends are supported, and more than one can be used
- to store different files' contents in a given repository.
- -}
module Backend (
storeFileKey,
retrieveKeyFile,
removeKey,
hasKey,
lookupFile
) where
import Control.Monad.State
import Control.Exception
import System.Directory
import System.FilePath
import Data.String.Utils
import System.Posix.Files
import Locations
import qualified GitRepo as Git
import qualified Annex
import Utility
import Types
import qualified TypeInternals as Internals
{- List of backends in the order to try them when storing a new key. -}
backendList :: Annex [Backend]
backendList = do
l <- Annex.backends
if (0 < length l)
then return l
else do
all <- Annex.supportedBackends
g <- Annex.gitRepo
let l = parseBackendList all $ Git.configGet g "annex.backends" ""
Annex.backendsChange l
return l
where
parseBackendList all s =
if (length s == 0)
then all
else map (lookupBackendName all) $ words s
{- Looks up a backend in the list of supportedBackends -}
lookupBackendName :: [Backend] -> String -> Backend
lookupBackendName all s =
if ((length matches) /= 1)
then error $ "unknown backend " ++ s
else matches !! 0
where matches = filter (\b -> s == Internals.name b) all
{- Attempts to store a file in one of the backends. -}
storeFileKey :: FilePath -> Annex (Maybe (Key, Backend))
storeFileKey file = do
g <- Annex.gitRepo
let relfile = Git.relative g file
b <- backendList
storeFileKey' b file relfile
storeFileKey' [] _ _ = return Nothing
storeFileKey' (b:bs) file relfile = do
try <- (Internals.getKey b) relfile
case (try) of
Nothing -> nextbackend
Just key -> do
stored <- (Internals.storeFileKey b) file key
if (not stored)
then nextbackend
else do
return $ Just (key, b)
where
nextbackend = storeFileKey' bs file relfile
{- Attempts to retrieve an key from one of the backends, saving it to
- a specified location. -}
retrieveKeyFile :: Backend -> Key -> FilePath -> Annex Bool
retrieveKeyFile backend key dest = (Internals.retrieveKeyFile backend) key dest
{- Removes a key from a backend. -}
removeKey :: Backend -> Key -> Annex Bool
removeKey backend key = (Internals.removeKey backend) key
{- Checks if a backend has its key. -}
hasKey :: Key -> Annex Bool
hasKey key = do
all <- Annex.supportedBackends
(Internals.hasKey (lookupBackendName all $ backendName key)) key
{- Looks up the key and backend corresponding to an annexed file,
- by examining what the file symlinks to. -}
lookupFile :: FilePath -> Annex (Maybe (Key, Backend))
lookupFile file = do
all <- Annex.supportedBackends
result <- liftIO $ (try (lookup all)::IO (Either SomeException (Maybe (Key, Backend))))
case (result) of
Left err -> return Nothing
Right succ -> return succ
where
lookup all = do
l <- readSymbolicLink file
return $ Just $ pair all $ takeFileName l
pair all file = (k, b)
where
k = fileKey file
b = lookupBackendName all $ backendName k

150
Backend/File.hs Normal file
View file

@ -0,0 +1,150 @@
{- git-annex pseudo-backend
-
- This backend does not really do any independant data storage,
- it relies on the file contents in .git/annex/ in this repo,
- and other accessible repos.
-
- This is an abstract backend; getKey has to be implemented to complete
- it.
-}
module Backend.File (backend) where
import Control.Monad.State
import System.IO
import System.Cmd
import System.Cmd.Utils
import Control.Exception
import List
import Maybe
import TypeInternals
import LocationLog
import Locations
import qualified Remotes
import qualified GitRepo as Git
import Utility
import Core
import qualified Annex
import UUID
import qualified Backend
backend = Backend {
name = mustProvide,
getKey = mustProvide,
storeFileKey = dummyStore,
retrieveKeyFile = copyKeyFile,
removeKey = checkRemoveKey,
hasKey = checkKeyFile
}
mustProvide = error "must provide this field"
{- Storing a key is a no-op. -}
dummyStore :: FilePath -> Key -> Annex (Bool)
dummyStore file key = return True
{- Just check if the .git/annex/ file for the key exists. -}
checkKeyFile :: Key -> Annex Bool
checkKeyFile k = inAnnex k
{- Try to find a copy of the file in one of the remotes,
- and copy it over to this one. -}
copyKeyFile :: Key -> FilePath -> Annex (Bool)
copyKeyFile key file = do
remotes <- Remotes.withKey key
if (0 == length remotes)
then do
showNote "not available"
showLocations key
return False
else trycopy remotes remotes
where
trycopy full [] = do
showNote "not available"
showTriedRemotes full
showLocations key
return False
trycopy full (r:rs) = do
-- annexLocation needs the git config to have been
-- read for a remote, so do that now,
-- if it hasn't been already
result <- Remotes.tryGitConfigRead r
case (result) of
Left err -> trycopy full rs
Right r' -> do
showNote $ "copying from " ++ (Git.repoDescribe r ) ++ "..."
liftIO $ copyFromRemote r' key file
{- Tries to copy a file from a remote. -}
copyFromRemote :: Git.Repo -> Key -> FilePath -> IO Bool
copyFromRemote r key file = do
if (Git.repoIsLocal r)
then getlocal
else getremote
where
getlocal = boolSystem "cp" ["-a", location, file]
getremote = return False -- TODO implement get from remote
location = annexLocation r key
showLocations :: Key -> Annex ()
showLocations key = do
g <- Annex.gitRepo
u <- getUUID g
uuids <- liftIO $ keyLocations g key
let uuidsf = filter (\v -> v /= u) uuids
ppuuids <- prettyPrintUUIDs uuidsf
if (0 < length uuidsf)
then showLongNote $ "Try making some of these repositories available:\n" ++ ppuuids
else showLongNote $ "No other repository is known to contain the file."
showTriedRemotes remotes =
showLongNote $ "I was unable to access these remotes: " ++
(Remotes.list remotes)
{- Checks remotes to verify that enough copies of a key exist to allow
- for a key to be safely removed (with no data loss), and fails with an
- error if not. -}
checkRemoveKey :: Key -> Annex (Bool)
checkRemoveKey key = do
force <- Annex.flagIsSet Force
if (force)
then return True
else do
g <- Annex.gitRepo
remotes <- Remotes.withKey key
let numcopies = read $ Git.configGet g config "1"
if (numcopies > length remotes)
then notEnoughCopies numcopies (length remotes) []
else findcopies numcopies 0 remotes []
where
config = "annex.numcopies"
findcopies need have [] bad =
if (have >= need)
then return True
else notEnoughCopies need have bad
findcopies need have (r:rs) bad = do
all <- Annex.supportedBackends
result <- liftIO $ ((try $ remoteHasKey r all)::IO (Either SomeException Bool))
case (result) of
Right True -> findcopies need (have+1) rs bad
Right False -> findcopies need have rs bad
Left _ -> findcopies need have rs (r:bad)
remoteHasKey r all = do
-- To check if a remote has a key, construct a new
-- Annex monad and query its backend.
a <- Annex.new r all
(result, _) <- Annex.run a (Backend.hasKey key)
return result
notEnoughCopies need have bad = do
unsafe
showLongNote $
"Could only verify the existence of " ++
(show have) ++ " out of " ++ (show need) ++
" necessary copies"
if (0 /= length bad) then showTriedRemotes bad else return ()
showLocations key
hint
return False
unsafe = showNote "unsafe"
hint = showLongNote $ "(Use --force to override this check, or adjust annex.numcopies.)"

16
Backend/SHA1.hs Normal file
View file

@ -0,0 +1,16 @@
{- git-annex "SHA1" backend
- -}
module Backend.SHA1 (backend) where
import qualified Backend.File
import TypeInternals
backend = Backend.File.backend {
name = "SHA1",
getKey = keyValue
}
-- checksum the file to get its key
keyValue :: FilePath -> Annex (Maybe Key)
keyValue k = error "SHA1 keyValue unimplemented" -- TODO

47
Backend/URL.hs Normal file
View file

@ -0,0 +1,47 @@
{- git-annex "URL" backend
- -}
module Backend.URL (backend) where
import Control.Exception
import Control.Monad.State (liftIO)
import Data.String.Utils
import System.Cmd
import System.Cmd.Utils
import System.Exit
import TypeInternals
import Core
backend = Backend {
name = "URL",
getKey = keyValue,
storeFileKey = dummyStore,
retrieveKeyFile = downloadUrl,
removeKey = dummyOk,
hasKey = dummyOk
}
-- cannot generate url from filename
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = return Nothing
-- cannot change url contents
dummyStore :: FilePath -> Key -> Annex Bool
dummyStore file url = return False
-- allow keys to be removed; presumably they can always be downloaded again
dummyOk :: Key -> Annex Bool
dummyOk url = return True
downloadUrl :: Key -> FilePath -> Annex Bool
downloadUrl key file = do
showNote "downloading"
liftIO $ putStrLn "" -- make way for curl progress bar
result <- liftIO $ (try curl::IO (Either SomeException ()))
case result of
Left err -> return False
Right succ -> return True
where
curl = safeSystem "curl" ["-#", "-o", file, url]
url = join ":" $ drop 1 $ split ":" $ show key

35
Backend/WORM.hs Normal file
View file

@ -0,0 +1,35 @@
{- git-annex "WORM" backend -- Write Once, Read Many
- -}
module Backend.WORM (backend) where
import Control.Monad.State
import System.FilePath
import System.Posix.Files
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Backend.File
import TypeInternals
import Utility
backend = Backend.File.backend {
name = "WORM",
getKey = keyValue
}
-- The key is formed from the file size, modification time, and the
-- basename of the filename.
--
-- That allows multiple files with the same names to have different keys,
-- while also allowing a file to be moved around while retaining the
-- same key.
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = do
stat <- liftIO $ getFileStatus file
return $ Just $ Key ((name backend), key stat)
where
key stat = uniqueid stat ++ sep ++ base
uniqueid stat = (show $ modificationTime stat) ++ sep ++
(show $ fileSize stat)
base = takeFileName file
sep = ":"

14
BackendList.hs Normal file
View file

@ -0,0 +1,14 @@
{- git-annex backend list
- -}
module BackendList (allBackends) where
-- When adding a new backend, import it here and add it to the list.
import qualified Backend.WORM
import qualified Backend.SHA1
import qualified Backend.URL
allBackends =
[ Backend.WORM.backend
, Backend.SHA1.backend
, Backend.URL.backend
]

235
Commands.hs Normal file
View file

@ -0,0 +1,235 @@
{- git-annex command line -}
module Commands (parseCmd) where
import System.Console.GetOpt
import Control.Monad.State (liftIO)
import System.Posix.Files
import System.Directory
import System.Path
import Data.String.Utils
import List
import IO
import qualified GitRepo as Git
import qualified Annex
import Utility
import Locations
import qualified Backend
import UUID
import LocationLog
import Types
import Core
import qualified Remotes
import qualified TypeInternals
data CmdWants = FilesInGit | FilesNotInGit | RepoName | SingleString
data Command = Command {
cmdname :: String,
cmdaction :: (String -> Annex ()),
cmdwants :: CmdWants,
cmddesc :: String
}
cmds :: [Command]
cmds = [
(Command "add" addCmd FilesNotInGit
"add files to annex")
, (Command "get" getCmd FilesInGit
"make content of annexed files available")
, (Command "drop" dropCmd FilesInGit
"indicate content of files not currently wanted")
, (Command "unannex" unannexCmd FilesInGit
"undo accidential add command")
, (Command "init" initCmd SingleString
"initialize git-annex with repository description")
, (Command "fix" fixCmd FilesInGit
"fix up files' symlinks to point to annexed content")
]
options = [
Option ['f'] ["force"] (NoArg Force) "allow actions that may lose annexed data"
]
header = "Usage: git-annex " ++ (join "|" $ map cmdname cmds) ++ " [path ...]"
usage = usageInfo header options ++ "\nSubcommands:\n" ++ cmddescs
where
cmddescs = unlines $ map (\c -> indent $ showcmd c) cmds
showcmd c =
(cmdname c) ++
(take (10 - (length (cmdname c))) $ repeat ' ') ++
(cmddesc c)
indent l = " " ++ l
{- Finds the type of parameters a command wants, from among the passed
- parameter list. -}
findWanted :: CmdWants -> [String] -> Git.Repo -> IO [String]
findWanted FilesNotInGit params repo = do
files <- mapM (Git.notInRepo repo) params
return $ foldl (++) [] files
findWanted FilesInGit params repo = do
files <- mapM (Git.inRepo repo) params
return $ foldl (++) [] files
findWanted SingleString params _ = do
return $ [unwords params]
findWanted RepoName params _ = do
return $ params
{- Parses command line and returns a list of flags and a list of
- actions to be run in the Annex monad. -}
parseCmd :: [String] -> AnnexState -> IO ([Flag], [Annex ()])
parseCmd argv state = do
(flags, params) <- getopt
case (length params) of
0 -> error usage
_ -> case (lookupCmd (params !! 0)) of
[] -> error usage
[Command _ action want _] -> do
f <- findWanted want (drop 1 params)
(TypeInternals.repo state)
return (flags, map action $ filter notstate f)
where
-- never include files from the state directory
notstate f = stateLoc /= take (length stateLoc) f
getopt = case getOpt Permute options argv of
(flags, params, []) -> return (flags, params)
(_, _, errs) -> ioError (userError (concat errs ++ usage))
lookupCmd cmd = filter (\c -> cmd == cmdname c) cmds
{- Annexes a file, storing it in a backend, and then moving it into
- the annex directory and setting up the symlink pointing to its content. -}
addCmd :: FilePath -> Annex ()
addCmd file = inBackend file $ do
s <- liftIO $ getSymbolicLinkStatus file
if ((isSymbolicLink s) || (not $ isRegularFile s))
then return ()
else do
showStart "add" file
g <- Annex.gitRepo
stored <- Backend.storeFileKey file
case (stored) of
Nothing -> showEndFail
Just (key, backend) -> do
logStatus key ValuePresent
setup g key
where
setup g key = do
let dest = annexLocation g key
liftIO $ createDirectoryIfMissing True (parentDir dest)
liftIO $ renameFile file dest
link <- calcGitLink file key
liftIO $ createSymbolicLink link file
liftIO $ Git.run g ["add", file]
showEndOk
{- Undo addCmd. -}
unannexCmd :: FilePath -> Annex ()
unannexCmd file = notinBackend file $ \(key, backend) -> do
showStart "unannex" file
Annex.flagChange Force True -- force backend to always remove
Backend.removeKey backend key
logStatus key ValueMissing
g <- Annex.gitRepo
let src = annexLocation g key
moveout g src
where
moveout g src = do
liftIO $ removeFile file
liftIO $ Git.run g ["rm", "--quiet", file]
-- git rm deletes empty directories;
-- put them back
liftIO $ createDirectoryIfMissing True (parentDir file)
liftIO $ renameFile src file
showEndOk
{- Gets an annexed file from one of the backends. -}
getCmd :: FilePath -> Annex ()
getCmd file = notinBackend file $ \(key, backend) -> do
inannex <- inAnnex key
if (inannex)
then return ()
else do
showStart "get" file
g <- Annex.gitRepo
let dest = annexLocation g key
let tmp = (annexTmpLocation g) ++ (keyFile key)
liftIO $ createDirectoryIfMissing True (parentDir tmp)
success <- Backend.retrieveKeyFile backend key tmp
if (success)
then do
liftIO $ renameFile tmp dest
logStatus key ValuePresent
showEndOk
else do
showEndFail
{- Indicates a file's content is not wanted anymore, and should be removed
- if it's safe to do so. -}
dropCmd :: FilePath -> Annex ()
dropCmd file = notinBackend file $ \(key, backend) -> do
inbackend <- Backend.hasKey key
if (not inbackend)
then return () -- no-op
else do
showStart "drop" file
success <- Backend.removeKey backend key
if (success)
then do
cleanup key
showEndOk
else showEndFail
where
cleanup key = do
logStatus key ValueMissing
inannex <- inAnnex key
if (inannex)
then do
g <- Annex.gitRepo
let loc = annexLocation g key
liftIO $ removeFile loc
return ()
else return ()
{- Fixes the symlink to an annexed file. -}
fixCmd :: String -> Annex ()
fixCmd file = notinBackend file $ \(key, backend) -> do
link <- calcGitLink file key
l <- liftIO $ readSymbolicLink file
if (link == l)
then return ()
else do
showStart "fix" file
liftIO $ createDirectoryIfMissing True (parentDir file)
liftIO $ removeFile file
liftIO $ createSymbolicLink link file
g <- Annex.gitRepo
liftIO $ Git.run g ["add", file]
showEndOk
{- Stores description for the repository. -}
initCmd :: String -> Annex ()
initCmd description = do
if (0 == length description)
then error $
"please specify a description of this repository\n" ++
usage
else do
g <- Annex.gitRepo
u <- getUUID g
describeUUID u description
log <- uuidLog
liftIO $ Git.run g ["add", log]
liftIO $ putStrLn "description set"
-- helpers
inBackend file a = do
r <- Backend.lookupFile file
case (r) of
Just v -> return ()
Nothing -> a
notinBackend file a = do
r <- Backend.lookupFile file
case (r) of
Just v -> a v
Nothing -> return ()

109
Core.hs Normal file
View file

@ -0,0 +1,109 @@
{- git-annex core functions -}
module Core where
import Maybe
import System.IO
import System.Directory
import Control.Monad.State (liftIO)
import System.Path
import Data.String.Utils
import Types
import Locations
import LocationLog
import UUID
import qualified GitRepo as Git
import qualified Annex
import Utility
{- Sets up a git repo for git-annex. -}
startup :: [Flag] -> Annex ()
startup flags = do
mapM (\f -> Annex.flagChange f True) flags
g <- Annex.gitRepo
liftIO $ gitAttributes g
prepUUID
{- When git-annex is done, it runs this. -}
shutdown :: Annex ()
shutdown = do
g <- Annex.gitRepo
liftIO $ Git.run g ["add", gitStateDir g]
-- clean up any files left in the temp directory
let tmp = annexTmpLocation g
exists <- liftIO $ doesDirectoryExist tmp
if (exists)
then liftIO $ removeDirectoryRecursive $ tmp
else return ()
{- configure git to use union merge driver on state files, if it is not
- already -}
gitAttributes :: Git.Repo -> IO ()
gitAttributes repo = do
exists <- doesFileExist attributes
if (not exists)
then do
writeFile attributes $ attrLine ++ "\n"
commit
else do
content <- readFile attributes
if (all (/= attrLine) (lines content))
then do
appendFile attributes $ attrLine ++ "\n"
commit
else return ()
where
attrLine = stateLoc ++ "*.log merge=union"
attributes = Git.attributes repo
commit = do
Git.run repo ["add", attributes]
Git.run repo ["commit", "-m", "git-annex setup",
attributes]
{- Checks if a given key is currently present in the annexLocation -}
inAnnex :: Key -> Annex Bool
inAnnex key = do
g <- Annex.gitRepo
liftIO $ doesFileExist $ annexLocation g key
{- Calculates the relative path to use to link a file to a key. -}
calcGitLink :: FilePath -> Key -> Annex FilePath
calcGitLink file key = do
g <- Annex.gitRepo
cwd <- liftIO $ getCurrentDirectory
let absfile = case (absNormPath cwd file) of
Just f -> f
Nothing -> error $ "unable to normalize " ++ file
return $ (relPathDirToDir (parentDir absfile) (Git.workTree g)) ++
annexLocationRelative g key
{- Updates the LocationLog when a key's presence changes. -}
logStatus :: Key -> LogStatus -> Annex ()
logStatus key status = do
g <- Annex.gitRepo
u <- getUUID g
liftIO $ logChange g key u status
{- Output logging -}
showStart :: String -> String -> Annex ()
showStart command file = do
liftIO $ putStr $ command ++ " " ++ file
liftIO $ hFlush stdout
showNote :: String -> Annex ()
showNote s = do
liftIO $ putStr $ " (" ++ s ++ ")"
liftIO $ hFlush stdout
showLongNote :: String -> Annex ()
showLongNote s = do
liftIO $ putStr $ "\n" ++ (indent s)
where
indent s = join "\n" $ map (\l -> " " ++ l) $ lines s
showEndOk :: Annex ()
showEndOk = do
liftIO $ putStrLn " ok"
showEndFail :: Annex ()
showEndFail = do
liftIO $ putStrLn "\nfailed"

267
GitRepo.hs Normal file
View file

@ -0,0 +1,267 @@
{- git repository handling
-
- This is written to be completely independant of git-annex and should be
- suitable for other uses.
-
-}
module GitRepo (
Repo,
repoFromCwd,
repoFromPath,
repoFromUrl,
repoIsLocal,
repoIsRemote,
repoDescribe,
workTree,
dir,
relative,
configGet,
configMap,
configRead,
run,
pipeRead,
attributes,
remotes,
remotesAdd,
repoRemoteName,
inRepo,
notInRepo
) where
import Directory
import System
import System.Directory
import System.Posix.Directory
import System.Path
import System.Cmd
import System.Cmd.Utils
import System.IO
import IO (bracket_)
import Data.String.Utils
import Data.Map as Map hiding (map, split)
import Network.URI
import Maybe
import Utility
{- A git repository can be on local disk or remote. Not to be confused
- with a git repo's configured remotes, some of which may be on local
- disk. -}
data Repo =
LocalRepo {
top :: FilePath,
config :: Map String String,
remotes :: [Repo],
-- remoteName holds the name used for this repo in remotes
remoteName :: Maybe String
} | RemoteRepo {
url :: String,
top :: FilePath,
config :: Map String String,
remotes :: [Repo],
remoteName :: Maybe String
} deriving (Show, Read, Eq)
{- Local Repo constructor. -}
repoFromPath :: FilePath -> Repo
repoFromPath dir =
LocalRepo {
top = dir,
config = Map.empty,
remotes = [],
remoteName = Nothing
}
{- Remote Repo constructor. Throws exception on invalid url. -}
repoFromUrl :: String -> Repo
repoFromUrl url =
RemoteRepo {
url = url,
top = path url,
config = Map.empty,
remotes = [],
remoteName = Nothing
}
where path url = uriPath $ fromJust $ parseURI url
{- User-visible description of a git repo. -}
repoDescribe repo =
if (isJust $ remoteName repo)
then fromJust $ remoteName repo
else if (repoIsLocal repo)
then top repo
else url repo
{- Constructs and returns an updated version of a repo with
- different remotes list. -}
remotesAdd :: Repo -> [Repo] -> Repo
remotesAdd repo rs = repo { remotes = rs }
{- Returns the name of the remote that corresponds to the repo, if
- it is a remote. Otherwise, "" -}
repoRemoteName r =
if (isJust $ remoteName r)
then fromJust $ remoteName r
else ""
{- Some code needs to vary between remote and local repos, or bare and
- non-bare, these functions help with that. -}
repoIsLocal repo = case (repo) of
LocalRepo {} -> True
RemoteRepo {} -> False
repoIsRemote repo = not $ repoIsLocal repo
assertlocal repo action =
if (repoIsLocal repo)
then action
else error $ "acting on remote git repo " ++ (repoDescribe repo) ++
" not supported"
bare :: Repo -> Bool
bare repo =
if (member b (config repo))
then ("true" == fromJust (Map.lookup b (config repo)))
else error $ "it is not known if git repo " ++ (repoDescribe repo) ++
" is a bare repository; config not read"
where
b = "core.bare"
{- Path to a repository's gitattributes file. -}
attributes :: Repo -> String
attributes repo = assertlocal repo $ do
if (bare repo)
then (top repo) ++ "/info/.gitattributes"
else (top repo) ++ "/.gitattributes"
{- Path to a repository's .git directory, relative to its topdir. -}
dir :: Repo -> String
dir repo = assertlocal repo $
if (bare repo)
then ""
else ".git"
{- Path to a repository's --work-tree. -}
workTree :: Repo -> FilePath
workTree repo = top repo
{- Given a relative or absolute filename in a repository, calculates the
- name to use to refer to the file relative to a git repository's top.
- This is the same form displayed and used by git. -}
relative :: Repo -> String -> String
relative repo file = drop (length absrepo) absfile
where
-- normalize both repo and file, so that repo
-- will be substring of file
absrepo = case (absNormPath "/" (top repo)) of
Just f -> f ++ "/"
Nothing -> error $ "bad repo" ++ (top repo)
absfile = case (secureAbsNormPath absrepo file) of
Just f -> f
Nothing -> error $ file ++ " is not located inside git repository " ++ absrepo
{- Constructs a git command line operating on the specified repo. -}
gitCommandLine :: Repo -> [String] -> [String]
gitCommandLine repo params = assertlocal repo $
-- force use of specified repo via --git-dir and --work-tree
["--git-dir="++(top repo)++"/"++(dir repo), "--work-tree="++(top repo)] ++ params
{- Runs git in the specified repo. -}
run :: Repo -> [String] -> IO ()
run repo params = assertlocal repo $ do
r <- safeSystem "git" (gitCommandLine repo params)
return ()
{- Runs a git subcommand and returns its output. -}
pipeRead :: Repo -> [String] -> IO String
pipeRead repo params = assertlocal repo $ do
pOpen ReadFromPipe "git" (gitCommandLine repo params) $ \h -> do
ret <- hGetContentsStrict h
return ret
{- Passed a location, recursively scans for all files that
- are checked into git at that location. -}
inRepo :: Repo -> FilePath -> IO [FilePath]
inRepo repo location = do
s <- pipeRead repo ["ls-files", "--cached", "--exclude-standard", location]
return $ lines s
{- Passed a location, recursively scans for all files that are not checked
- into git, and not gitignored. -}
notInRepo :: Repo -> FilePath -> IO [FilePath]
notInRepo repo location = do
s <- pipeRead repo ["ls-files", "--others", "--exclude-standard", location]
return $ lines s
{- Runs git config and populates a repo with its config. -}
configRead :: Repo -> IO Repo
configRead repo = assertlocal repo $ do
{- Cannot use pipeRead because it relies on the config having
been already read. Instead, chdir to the repo. -}
cwd <- getCurrentDirectory
bracket_ (changeWorkingDirectory (top repo))
(\_ -> changeWorkingDirectory cwd) $
pOpen ReadFromPipe "git" ["config", "--list"] $ \h -> do
val <- hGetContentsStrict h
let r = repo { config = configParse val }
return r { remotes = configRemotes r }
{- Calculates a list of a repo's configured remotes, by parsing its config. -}
configRemotes :: Repo -> [Repo]
configRemotes repo = map construct remotes
where
remotes = toList $ filter $ config repo
filter = filterWithKey (\k _ -> isremote k)
isremote k = (startswith "remote." k) && (endswith ".url" k)
remotename k = (split "." k) !! 1
construct (k,v) = (gen v) { remoteName = Just $ remotename k }
gen v = if (isURI v)
then repoFromUrl v
else repoFromPath v
{- Parses git config --list output into a config map. -}
configParse :: String -> Map.Map String String
configParse s = Map.fromList $ map pair $ lines s
where
pair l = (key l, val l)
key l = (keyval l) !! 0
val l = join sep $ drop 1 $ keyval l
keyval l = split sep l :: [String]
sep = "="
{- Returns a single git config setting, or a default value if not set. -}
configGet :: Repo -> String -> String -> String
configGet repo key defaultValue =
Map.findWithDefault defaultValue key (config repo)
{- Access to raw config Map -}
configMap :: Repo -> Map String String
configMap repo = config repo
{- Finds the current git repository, which may be in a parent directory. -}
repoFromCwd :: IO Repo
repoFromCwd = do
cwd <- getCurrentDirectory
top <- seekUp cwd isRepoTop
case top of
(Just dir) -> return $ repoFromPath dir
Nothing -> error "Not in a git repository."
seekUp :: String -> (String -> IO Bool) -> IO (Maybe String)
seekUp dir want = do
ok <- want dir
if ok
then return (Just dir)
else case (parentDir dir) of
"" -> return Nothing
d -> seekUp d want
isRepoTop dir = do
r <- isRepo dir
b <- isBareRepo dir
return (r || b)
where
isRepo dir = gitSignature dir ".git" ".git/config"
isBareRepo dir = gitSignature dir "objects" "config"
gitSignature dir subdir file = do
s <- (doesDirectoryExist (dir ++ "/" ++ subdir))
f <- (doesFileExist (dir ++ "/" ++ file))
return (s && f)

1
INSTALL Normal file
View file

@ -0,0 +1 @@
See doc/install.mdwn for installation instructions.

160
LocationLog.hs Normal file
View file

@ -0,0 +1,160 @@
{- git-annex location log
-
- git-annex keeps track of on which repository it last saw a value.
- This can be useful when using it for archiving with offline storage.
- When you indicate you --want a file, git-annex will tell you which
- repositories have the value.
-
- Location tracking information is stored in `.git-annex/key.log`.
- Repositories record their UUID and the date when they --get or --drop
- a value.
-
- A line of the log will look like: "date N UUID"
- Where N=1 when the repo has the file, and 0 otherwise.
-
- Git is configured to use a union merge for this file,
- so the lines may be in arbitrary order, but it will never conflict.
-}
module LocationLog (
LogStatus(..),
logChange,
keyLocations
) where
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import qualified Data.Map as Map
import System.IO
import System.Directory
import Data.Char
import qualified GitRepo as Git
import Utility
import UUID
import Types
import Locations
data LogLine = LogLine {
date :: POSIXTime,
status :: LogStatus,
uuid :: UUID
} deriving (Eq)
data LogStatus = ValuePresent | ValueMissing | Undefined
deriving (Eq)
instance Show LogStatus where
show ValuePresent = "1"
show ValueMissing = "0"
show Undefined = "undefined"
instance Read LogStatus where
readsPrec _ "1" = [(ValuePresent, "")]
readsPrec _ "0" = [(ValueMissing, "")]
readsPrec _ _ = [(Undefined, "")]
instance Show LogLine where
show (LogLine date status uuid) = unwords
[(show date), (show status), uuid]
instance Read LogLine where
-- This parser is robust in that even unparsable log lines are
-- read without an exception being thrown.
-- Such lines have a status of Undefined.
readsPrec _ string =
if (length w == 3)
then case (pdate) of
Just v -> good v
Nothing -> undefined
else undefined
where
w = words string
date = w !! 0
status = read $ w !! 1
uuid = w !! 2
pdate = (parseTime defaultTimeLocale "%s%Qs" date) :: Maybe UTCTime
good v = ret $ LogLine (utcTimeToPOSIXSeconds v) status uuid
undefined = ret $ LogLine (0) Undefined ""
ret v = [(v, "")]
{- Log a change in the presence of a key's value in a repository. -}
logChange :: Git.Repo -> Key -> UUID -> LogStatus -> IO ()
logChange repo key uuid status = do
log <- logNow status uuid
ls <- readLog logfile
writeLog logfile (compactLog $ log:ls)
where
logfile = logFile repo key
{- Reads a log file.
- Note that the LogLines returned may be in any order. -}
readLog :: FilePath -> IO [LogLine]
readLog file = do
exists <- doesFileExist file
if exists
then do
s <- withFileLocked file ReadMode $ \h ->
hGetContentsStrict h
-- filter out any unparsable lines
return $ filter (\l -> (status l) /= Undefined )
$ map read $ lines s
else do
return []
{- Adds a LogLine to a log file -}
appendLog :: FilePath -> LogLine -> IO ()
appendLog file line = do
createDirectoryIfMissing True (parentDir file)
withFileLocked file AppendMode $ \h ->
hPutStrLn h $ show line
{- Writes a set of lines to a log file -}
writeLog :: FilePath -> [LogLine] -> IO ()
writeLog file lines = do
createDirectoryIfMissing True (parentDir file)
withFileLocked file WriteMode $ \h ->
hPutStr h $ unlines $ map show lines
{- Generates a new LogLine with the current date. -}
logNow :: LogStatus -> UUID -> IO LogLine
logNow status uuid = do
now <- getPOSIXTime
return $ LogLine now status uuid
{- Returns the filename of the log file for a given key. -}
logFile :: Git.Repo -> Key -> String
logFile repo key =
(gitStateDir repo) ++ (Git.relative repo (keyFile key)) ++ ".log"
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -}
keyLocations :: Git.Repo -> Key -> IO [UUID]
keyLocations thisrepo key = do
lines <- readLog $ logFile thisrepo key
return $ map uuid (filterPresent lines)
{- Filters the list of LogLines to find ones where the value
- is (or should still be) present. -}
filterPresent :: [LogLine] -> [LogLine]
filterPresent lines = filter (\l -> ValuePresent == status l) $ compactLog lines
{- Compacts a set of logs, returning a subset that contains the current
- status. -}
compactLog :: [LogLine] -> [LogLine]
compactLog lines = compactLog' Map.empty lines
compactLog' map [] = Map.elems map
compactLog' map (l:ls) = compactLog' (mapLog map l) ls
{- Inserts a log into a map of logs, if the log has better (ie, newer)
- information about a repo than the other logs in the map -}
mapLog map log =
if (better)
then Map.insert (uuid log) log map
else map
where
better = case (Map.lookup (uuid log) map) of
Just l -> (date l <= date log)
Nothing -> True

62
Locations.hs Normal file
View file

@ -0,0 +1,62 @@
{- git-annex file locations
-}
module Locations (
gitStateDir,
stateLoc,
keyFile,
fileKey,
annexLocation,
annexLocationRelative,
annexTmpLocation
) where
import Data.String.Utils
import Types
import qualified TypeInternals as Internals
import qualified GitRepo as Git
{- Long-term, cross-repo state is stored in files inside the .git-annex
- directory, in the git repository's working tree. -}
stateLoc = ".git-annex/"
gitStateDir :: Git.Repo -> FilePath
gitStateDir repo = (Git.workTree repo) ++ "/" ++ stateLoc
{- An annexed file's content is stored in
- /path/to/repo/.git/annex/<key>, where <key> is of the form
- <backend:fragment>
-
- That allows deriving the key and backend by looking at the symlink to it.
-}
annexLocation :: Git.Repo -> Key -> FilePath
annexLocation r key =
(Git.workTree r) ++ "/" ++ (annexLocationRelative r key)
{- Annexed file's location relative to git's working tree. -}
annexLocationRelative :: Git.Repo -> Key -> FilePath
annexLocationRelative r key = Git.dir r ++ "/annex/" ++ (keyFile key)
{- .git-annex/tmp is used for temp files
-}
annexTmpLocation :: Git.Repo -> FilePath
annexTmpLocation r = (Git.workTree r) ++ "/" ++ Git.dir r ++ "/annex/tmp/"
{- Converts a key into a filename fragment.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
- issues with keys containing "/../" or ending with "/" etc.
-
- "/" is escaped to "%" because it's short and rarely used, and resembles
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
- -}
keyFile :: Key -> FilePath
keyFile key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ show key
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
fileKey :: FilePath -> Key
fileKey file = read $
replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file

29
Makefile Normal file
View file

@ -0,0 +1,29 @@
all: git-annex docs
git-annex:
mkdir -p build
ghc -odir build -hidir build --make git-annex
install:
install -d $(DESTDIR)/usr/bin
install git-annex $(DESTDIR)/usr/bin
# If ikiwiki is available, build static html docs suitable for being
# shipped in the software package.
ifeq ($(shell which ikiwiki),)
IKIWIKI=echo "** ikiwiki not found, skipping building docs" >&2
else
IKIWIKI=ikiwiki
endif
docs:
./mdwn2man git-annex 1 doc/git-annex.mdwn > git-annex.1
$(IKIWIKI) doc html -v --wikiname git-annex --plugin=goodstuff \
--no-usedirs --disable-plugin=openid --plugin=sidebar \
--underlaydir=/dev/null
clean:
rm -rf build git-annex git-annex.1
rm -rf doc/.ikiwiki html
.PHONY: git-annex

112
Remotes.hs Normal file
View file

@ -0,0 +1,112 @@
{- git-annex remote repositories -}
module Remotes (
list,
withKey,
tryGitConfigRead
) where
import Control.Exception
import Control.Monad.State (liftIO)
import qualified Data.Map as Map
import Data.String.Utils
import Data.Either.Utils
import List
import Maybe
import Types
import qualified GitRepo as Git
import qualified Annex
import LocationLog
import Locations
import UUID
{- Human visible list of remotes. -}
list :: [Git.Repo] -> String
list remotes = join " " $ map Git.repoDescribe remotes
{- Cost ordered list of remotes that the LocationLog indicate may have a key. -}
withKey :: Key -> Annex [Git.Repo]
withKey key = do
g <- Annex.gitRepo
uuids <- liftIO $ keyLocations g key
allremotes <- remotesByCost
-- This only uses cached data, so may not include new remotes
-- or remotes whose uuid has changed (eg by a different drive being
-- mounted at their location). So unless it happens to find all
-- remotes, try harder, loading the remotes' configs.
remotes <- reposByUUID allremotes uuids
remotesread <- Annex.flagIsSet RemotesRead
if ((length allremotes /= length remotes) && not remotesread)
then tryharder allremotes uuids
else return remotes
where
tryharder allremotes uuids = do
-- more expensive; read each remote's config
eitherremotes <- mapM tryGitConfigRead allremotes
let allremotes' = map fromEither eitherremotes
remotes' <- reposByUUID allremotes' uuids
Annex.flagChange RemotesRead True
return remotes'
{- Cost Ordered list of remotes. -}
remotesByCost :: Annex [Git.Repo]
remotesByCost = do
g <- Annex.gitRepo
reposByCost $ Git.remotes g
{- Orders a list of git repos by cost. -}
reposByCost :: [Git.Repo] -> Annex [Git.Repo]
reposByCost l = do
costpairs <- mapM costpair l
return $ fst $ unzip $ sortBy bycost $ costpairs
where
costpair r = do
cost <- repoCost r
return (r, cost)
bycost (_, c1) (_, c2) = compare c1 c2
{- Calculates cost for a repo.
-
- The default cost is 100 for local repositories, and 200 for remote
- repositories; it can also be configured by remote.<name>.annex-cost
-}
repoCost :: Git.Repo -> Annex Int
repoCost r = do
g <- Annex.gitRepo
if ((length $ config g r) > 0)
then return $ read $ config g r
else if (Git.repoIsLocal r)
then return 100
else return 200
where
config g r = Git.configGet g (configkey r) ""
configkey r = "remote." ++ (Git.repoRemoteName r) ++ ".annex-cost"
{- The git configs for the git repo's remotes is not read on startup
- because reading it may be expensive. This function tries to read the
- config for a specified remote, and updates state. If successful, it
- returns the updated git repo. -}
tryGitConfigRead :: Git.Repo -> Annex (Either Git.Repo Git.Repo)
tryGitConfigRead r = do
if (Map.null $ Git.configMap r)
then do
-- configRead can fail due to IO error or
-- for other reasons; catch all possible exceptions
result <- liftIO $ (try (Git.configRead r)::IO (Either SomeException (Git.Repo)))
case (result) of
Left err -> return $ Left r
Right r' -> do
g <- Annex.gitRepo
let l = Git.remotes g
let g' = Git.remotesAdd g $
exchange l r'
Annex.gitRepoChange g'
return $ Right r'
else return $ Right r -- config already read
where
exchange [] new = []
exchange (old:ls) new =
if ((Git.repoRemoteName old) == (Git.repoRemoteName new))
then new:(exchange ls new)
else old:(exchange ls new)

72
TypeInternals.hs Normal file
View file

@ -0,0 +1,72 @@
{- git-annex internal data types
-
- Most things should not need this, using Types and/or Annex instead.
-}
module TypeInternals where
import Control.Monad.State (StateT)
import Data.String.Utils
import qualified GitRepo as Git
data Flag =
Force | -- command-line flags
RemotesRead -- indicates that remote repo configs have been read
deriving (Eq, Read, Show)
-- git-annex's runtime state type doesn't really belong here,
-- but it uses Backend, so has to be here to avoid a depends loop.
data AnnexState = AnnexState {
repo :: Git.Repo,
backends :: [Backend],
supportedBackends :: [Backend],
flags :: [Flag]
} deriving (Show)
-- git-annex's monad
type Annex = StateT AnnexState IO
-- annexed filenames are mapped through a backend into keys
type KeyFrag = String
type BackendName = String
data Key = Key (BackendName, KeyFrag) deriving (Eq)
-- show a key to convert it to a string; the string includes the
-- name of the backend to avoid collisions between key strings
instance Show Key where
show (Key (b, k)) = b ++ ":" ++ k
instance Read Key where
readsPrec _ s = [((Key (b,k)) ,"")]
where
l = split ":" s
b = l !! 0
k = join ":" $ drop 1 l
-- pulls the backend name out
backendName :: Key -> BackendName
backendName (Key (b,k)) = b
-- pulls the key fragment out
keyFrag :: Key -> KeyFrag
keyFrag (Key (b,k)) = k
-- this structure represents a key-value backend
data Backend = Backend {
-- name of this backend
name :: String,
-- converts a filename to a key
getKey :: FilePath -> Annex (Maybe Key),
-- stores a file's contents to a key
storeFileKey :: FilePath -> Key -> Annex Bool,
-- retrieves a key's contents to a file
retrieveKeyFile :: Key -> FilePath -> Annex Bool,
-- removes a key
removeKey :: Key -> Annex Bool,
-- checks if a backend is storing the content of a key
hasKey :: Key -> Annex Bool
}
instance Show Backend where
show backend = "Backend { name =\"" ++ (name backend) ++ "\" }"

13
Types.hs Normal file
View file

@ -0,0 +1,13 @@
{- git-annex abstract data types -}
module Types (
Annex,
AnnexState,
Backend,
Key,
backendName,
keyFrag,
Flag(..),
) where
import TypeInternals

140
UUID.hs Normal file
View file

@ -0,0 +1,140 @@
{- git-annex uuids
-
- Each git repository used by git-annex has an annex.uuid setting that
- uniquely identifies that repository.
-
-}
module UUID (
UUID,
getUUID,
prepUUID,
genUUID,
reposByUUID,
prettyPrintUUIDs,
describeUUID,
uuidLog
) where
import Control.Monad.State
import Maybe
import List
import System.Cmd.Utils
import System.IO
import System.Directory
import qualified Data.Map as M
import qualified GitRepo as Git
import Types
import Locations
import qualified Annex
import Utility
type UUID = String
configkey="annex.uuid"
{- Generates a UUID. There is a library for this, but it's not packaged,
- so use the command line tool. -}
genUUID :: IO UUID
genUUID = liftIO $ pOpen ReadFromPipe "uuid" ["-m"] $ \h -> hGetLine h
{- Looks up a repo's UUID. May return "" if none is known.
-
- UUIDs of remotes are cached in git config, using keys named
- remote.<name>.annex-uuid
-
- -}
getUUID :: Git.Repo -> Annex UUID
getUUID r = do
g <- Annex.gitRepo
let c = cached r g
let u = uncached r
if (c /= u && u /= "")
then do
updatecache g r u
return u
else return c
where
uncached r = Git.configGet r "annex.uuid" ""
cached r g = Git.configGet g (cachekey r) ""
updatecache g r u = do
if (g /= r)
then setConfig (cachekey r) u
else return ()
cachekey r = "remote." ++ (Git.repoRemoteName r) ++ ".annex-uuid"
{- Make sure that the repo has an annex.uuid setting. -}
prepUUID :: Annex ()
prepUUID = do
g <- Annex.gitRepo
u <- getUUID g
if ("" == u)
then do
uuid <- liftIO $ genUUID
setConfig configkey uuid
else return ()
{- Changes a git config setting in both internal state and .git/config -}
setConfig :: String -> String -> Annex ()
setConfig key value = do
g <- Annex.gitRepo
liftIO $ Git.run g ["config", key, value]
-- re-read git config and update the repo's state
g' <- liftIO $ Git.configRead g
Annex.gitRepoChange g'
return ()
{- Filters a list of repos to ones that have listed UUIDs. -}
reposByUUID :: [Git.Repo] -> [UUID] -> Annex [Git.Repo]
reposByUUID repos uuids = do
filterM match repos
where
match r = do
u <- getUUID r
return $ isJust $ elemIndex u uuids
{- Pretty-prints a list of UUIDs -}
prettyPrintUUIDs :: [UUID] -> Annex String
prettyPrintUUIDs uuids = do
m <- uuidMap
return $ unwords $ map (\u -> "\t"++(prettify m u)++"\n") uuids
where
prettify m u =
if (0 < (length $ findlog m u))
then u ++ " -- " ++ (findlog m u)
else u
findlog m u = M.findWithDefault "" u m
{- Records a description for a uuid in the uuidLog. -}
describeUUID :: UUID -> String -> Annex ()
describeUUID uuid desc = do
m <- uuidMap
let m' = M.insert uuid desc m
log <- uuidLog
liftIO $ createDirectoryIfMissing True (parentDir log)
liftIO $ withFileLocked log WriteMode (\h -> hPutStr h $ serialize m')
where
serialize m = unlines $ map (\(u, d) -> u++" "++d) $ M.toList m
{- Read and parse the uuidLog into a Map -}
uuidMap :: Annex (M.Map UUID String)
uuidMap = do
log <- uuidLog
s <- liftIO $ catch
(withFileLocked log ReadMode $ \h -> hGetContentsStrict h)
(\error -> return "")
return $ M.fromList $ map (\l -> pair l) $ lines s
where
pair l =
if (1 < (length $ words l))
then ((words l) !! 0, unwords $ drop 1 $ words l)
else ("", "")
{- Filename of uuid.log. -}
uuidLog :: Annex String
uuidLog = do
g <- Annex.gitRepo
return $ (gitStateDir g) ++ "uuid.log"

110
Utility.hs Normal file
View file

@ -0,0 +1,110 @@
{- git-annex utility functions
-}
module Utility (
withFileLocked,
hGetContentsStrict,
parentDir,
relPathCwdToDir,
relPathDirToDir,
boolSystem
) where
import System.IO
import System.Cmd
import System.Exit
import System.Posix.Signals
import Data.Typeable
import System.Posix.IO
import Data.String.Utils
import System.Path
import System.IO.HVFS
import System.FilePath
import System.Directory
{- Let's just say that Haskell makes reading/writing a file with
- file locking excessively difficult. -}
withFileLocked file mode action = do
-- TODO: find a way to use bracket here
handle <- openFile file mode
lockfd <- handleToFd handle -- closes handle
waitToSetLock lockfd (lockType mode, AbsoluteSeek, 0, 0)
handle' <- fdToHandle lockfd
ret <- action handle'
hClose handle'
return ret
where
lockType ReadMode = ReadLock
lockType _ = WriteLock
{- A version of hgetContents that is not lazy. Ensures file is
- all read before it gets closed. -}
hGetContentsStrict h = hGetContents h >>= \s -> length s `seq` return s
{- Returns the parent directory of a path. Parent of / is "" -}
parentDir :: String -> String
parentDir dir =
if length dirs > 0
then slash ++ (join s $ take ((length dirs) - 1) dirs)
else ""
where
dirs = filter (\x -> length x > 0) $
split s dir
slash = if (not $ isAbsolute dir) then "" else s
s = [pathSeparator]
{- Constructs a relative path from the CWD to a directory.
-
- For example, assuming CWD is /tmp/foo/bar:
- relPathCwdToDir "/tmp/foo" == "../"
- relPathCwdToDir "/tmp/foo/bar" == ""
- relPathCwdToDir "/tmp/foo/bar" == ""
-}
relPathCwdToDir :: FilePath -> IO FilePath
relPathCwdToDir dir = do
cwd <- getCurrentDirectory
let absdir = abs cwd dir
return $ relPathDirToDir cwd absdir
where
-- absolute, normalized form of the directory
abs cwd dir =
case (absNormPath cwd dir) of
Just d -> d
Nothing -> error $ "unable to normalize " ++ dir
{- Constructs a relative path from one directory to another.
-
- Both directories must be absolute, and normalized (eg with absNormpath).
-
- The path will end with "/", unless it is empty.
-}
relPathDirToDir :: FilePath -> FilePath -> FilePath
relPathDirToDir from to =
if (0 < length path)
then addTrailingPathSeparator path
else ""
where
s = [pathSeparator]
pfrom = split s from
pto = split s to
common = map fst $ filter same $ zip pfrom pto
same (c,d) = c == d
uncommon = drop numcommon pto
dotdots = take ((length pfrom) - numcommon) $ repeat ".."
numcommon = length $ common
path = join s $ dotdots ++ uncommon
{- Run a system command, and returns True or False
- if it succeeded or failed.
-
- An error is thrown if the command exits due to SIGINT,
- to propigate ctrl-c.
-}
boolSystem :: FilePath -> [String] -> IO Bool
boolSystem command params = do
r <- rawSystem command params
case r of
ExitSuccess -> return True
ExitFailure e -> if Just e == cast sigINT
then error $ command ++ "interrupted"
else return False

5
debian/changelog vendored Normal file
View file

@ -0,0 +1,5 @@
git-annex (0.01) UNRELEASED; urgency=low
* First release
-- Joey Hess <joeyh@debian.org> Thu, 09 Sep 2010 08:24:58 -0400

1
debian/compat vendored Normal file
View file

@ -0,0 +1 @@
7

26
debian/control vendored Normal file
View file

@ -0,0 +1,26 @@
Source: git-annex
Section: utils
Priority: optional
Build-Depends: debhelper (>= 7.0.50), ghc6, libghc6-missingh-dev, ikiwiki
Maintainer: Joey Hess <joeyh@debian.org>
Standards-Version: 3.9.1
Vcs-Git: git://git.kitenet.net/git-annex
Homepage: http://git-annex.branchable.com/
Package: git-annex
Architecture: any
Section: utils
Depends: ${misc:Depends}, ${shlibs:Depends}, git | git-core, uuid
Description: manage files with git, without checking their contents into git
git-annex allows managing files with git, without checking the file
contents into git. While that may seem paradoxical, it is useful when
dealing with files larger than git can currently easily handle, whether due
to limitations in memory, checksumming time, or disk space.
.
Even without file content tracking, being able to manage files with git,
move files around and delete files with versioned directory trees, and use
branches and distributed clones, are all very handy reasons to use git. And
annexed files can co-exist in the same git repository with regularly
versioned files, which is convenient for maintaining documents, Makefiles,
etc that are associated with annexed files but that benefit from full
revision control.

5
debian/copyright vendored Normal file
View file

@ -0,0 +1,5 @@
Files: *
Copyright: © 2010 Joey Hess <joey@kitenet.net>
License: GPL-2+
The full text of the GPL is distributed as doc/GPL in this package's
source, or in /usr/share/common-licenses/GPL on Debian systems.

1
debian/docs vendored Normal file
View file

@ -0,0 +1 @@
html

1
debian/manpages vendored Normal file
View file

@ -0,0 +1 @@
git-annex.1

7
debian/rules vendored Executable file
View file

@ -0,0 +1,7 @@
#!/usr/bin/make -f
%:
dh $@
# Not intended for use by anyone except the author.
announcedir:
@echo ${HOME}/src/joeywiki/code/git-annex/news

339
doc/GPL Normal file
View file

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

21
doc/backends.mdwn Normal file
View file

@ -0,0 +1,21 @@
git-annex uses a key-value abstraction layer to allow file contents to be
stored in different ways. In theory, any key-value storage system could be
used to store file contents.
When a file is annexed, a key is generated from its content and/or metadata.
The file checked into git symlinks to the key. This key can later be used
to retrieve the file's content (its value).
Multiple pluggable backends are supported, and more than one can be used
to store different files' contents in a given repository.
* `WORM` ("Write Once, Read Many") This backend stores the file's content
only in `.git/annex/`, and assumes that any file with the same basename,
size, and modification time has the same content. So with this backend,
files can be moved around, but should never be added to or changed.
This is the default, and the least expensive backend.
* `SHA1` -- This backend stores the file's content in
`.git/annex/`, with a name based on its sha1 checksum. This backend allows
modifications of files to be tracked. Its need to generate checksums
can make it slower for large files.
* `URL` -- This backend downloads the file's content from an external URL.

4
doc/bugs.mdwn Normal file
View file

@ -0,0 +1,4 @@
This is git-annex's bug list. Link bugs to [[bugs/done]] when done.
[[!inline pages="./bugs/* and !./bugs/done and !link(done)
and !*/Discussion" actions=yes postform=yes show=0 archive=yes]]

View file

@ -0,0 +1 @@
This backend is not finished.

36
doc/bugs/branching.mdwn Normal file
View file

@ -0,0 +1,36 @@
The use of `.git-annex` to store logs means that if a repo has branches
and the user switched between them, git-annex will see different logs in
the different branches, and so may miss info about what remotes have which
files (though it can re-learn).
An alternative would be to store the log data directly in the git repo
as `pristine-tar` does. Problem with that approach is that git won't merge
conflicting changes to log files if they are not in the currently checked
out branch.
It would be possible to use a branch with a tree like this, to avoid
conflicts:
key/uuid/time/status
As long as new files are only added, and old timestamped files deleted,
there would be no conflicts.
A related problem though is the size of the tree objects git needs to
commit. Having the logs in a separate branch doesn't help with that.
As more keys are added, the tree object size will increase, and git will
take longer and longer to commit, and use more space. One way to deal with
this is simply by splitting the logs amoung subdirectories. Git then can
reuse trees for most directories. (Check: Does it still have to build
dup trees in memory?)
Another approach would be to have git-annex *delete* old logs. Keep logs
for the currently available files, or something like that. If other log
info is needed, look back through history to find the first occurance of a
log. Maybe even look at other branches -- so if the logs were on master,
a new empty branch could be made and git-annex would still know where to
get keys in that branch.
Would have to be careful about conflicts when deleting and bringing back
files with the same name. And would need to avoid expensive searching thru
all history to try to find an old log file.

4
doc/bugs/done.mdwn Normal file
View file

@ -0,0 +1,4 @@
recently fixed [[bugs]]
[[!inline pages="./* and link(./done) and !*/Discussion" sort=mtime show=10
archive=yes]]

View file

@ -0,0 +1,2 @@
cannot "git annex ../foo" (GitRepo.relative is buggy and
git-ls-files also refuses w/o --full-name, which would need other changes)

View file

@ -0,0 +1,3 @@
Find a way to copy a file with a progress bar, while still preserving
stat. Easiest way might be to use pv and fix up the permissions etc
after?

View file

@ -0,0 +1,8 @@
Should check that there is enough free space before trying to copy a
file around.
* Need a way to tell how much free space is available on the disk containing
a given repository.
* And, need a way to tell the size of a file before copying it from
a remote, to check local disk space.

1
doc/bugs/fsck.mdwn Normal file
View file

@ -0,0 +1 @@
add a git annex fsck that finds keys that have no referring file

2
doc/bugs/gitrm.mdwn Normal file
View file

@ -0,0 +1,2 @@
how to handle git rm file? (should try to drop keys that have no
referring file, if it seems safe..)

View file

@ -0,0 +1,3 @@
Support for remote git repositories (ssh:// specifically can be made to
work, although the other end probably needs to have git-annex
installed..)

2
doc/bugs/pushpull.mdwn Normal file
View file

@ -0,0 +1,2 @@
--push/--pull should take a reponame and files, and push those files
to that repo; dropping them from the current repo

View file

@ -0,0 +1,12 @@
TODO: implement below
git-annex does use a lot of symlinks. Specicially, relative symlinks,
that are checked into git. To allow you to move those around without
annoyance, git-annex can run as a post-commit hook. This way, you can `git mv`
a symlink to an annexed file, and as soon as you commit, it will be fixed
up.
`git annex init` tries to set up a post-commit hook that is itself a symlink
back to git-annex. If you want to have your own shell script in the post-commit
hook, just make it call `git annex` with no parameters. git-annex will detect
when it's run from a git hook and do the necessary fixups.

View file

@ -0,0 +1,9 @@
There is no way to `git annex add` a file using the URL [[backend|backends]].
For now, we have to manually make the symlink. Something like this:
ln -s .git/annex/URL:http:%%www.example.com%foo.tar.gz
Note the escaping of slashes.
A `git annex register <url>` command could do this..

4
doc/contact.mdwn Normal file
View file

@ -0,0 +1,4 @@
Joey Hess <joey@kitenet.net> is the author of git-annex.
The [VCS-home mailing list](http://lists.madduck.net/listinfo/vcs-home)
is a good place to discuss it.

30
doc/copies.mdwn Normal file
View file

@ -0,0 +1,30 @@
The WORM and SHA1 key-value [[backends|backend]] store data inside
your git repository's `.git` directory, not in some external data store.
It's important that data not get lost by an ill-considered `git annex drop`
command. So, then using those backends, git-annex can be configured to try
to keep N copies of a file's content available across all repositories. By
default, N is 1; it is configured by annex.numcopies.
`git annex drop` attempts to check with other git remotes, to check that N
copies of the file exist. If enough repositories cannot be verified to have
it, it will retain the file content to avoid data loss.
For example, consider three repositories: Server, Laptop, and USB. Both Server
and USB have a copy of a file, and N=1. If on Laptop, you `git annex get
$file`, this will transfer it from either Server or USB (depending on which
is available), and there are now 3 copies of the file.
Suppose you want to free up space on Laptop again, and you `git annex drop` the file
there. If USB is connected, or Server can be contacted, git-annex can check
that it still has a copy of the file, and the content is removed from
Laptop. But if USB is currently disconnected, and Server also cannot be
contacted, it can't verify that it is safe to drop the file, and will
refuse to do so.
With N=2, in order to drop the file content from Laptop, it would need access
to both USB and Server.
Note that different repositories can be configured with different values of
N. So just because Laptop has N=2, this does not prevent the number of
copies falling to 1, when USB and Server have N=1.

7
doc/download.mdwn Normal file
View file

@ -0,0 +1,7 @@
The main git repository for git-annex is `git://git.kitenet.net/git-annex`
[[gitweb](http://git.kitenet.net/?p=git-annex;a=summary)]
There are no binary packages yet, but you can build Debian packages from
the source tree with `dpkg-buildpackage`.
Next: [[install]]

148
doc/git-annex.mdwn Normal file
View file

@ -0,0 +1,148 @@
# NAME
git-annex - manage files with git, without checking their contents in
# SYNOPSIS
git annex subcommand [path ...]
# DESCRIPTION
git-annex allows managing files with git, without checking the file
contents into git. While that may seem paradoxical, it is useful when
dealing with files larger than git can currently easily handle, whether due
to limitations in memory, checksumming time, or disk space.
Even without file content tracking, being able to manage files with git,
move files around and delete files with versioned directory trees, and use
branches and distributed clones, are all very handy reasons to use git. And
annexed files can co-exist in the same git repository with regularly
versioned files, which is convenient for maintaining documents, Makefiles,
etc that are associated with annexed files but that benefit from full
revision control.
When a file is annexed, its content is moved into a key-value store, and
a symlink is made that points to the content. These symlinks are checked into
git and versioned like regular files. You can move them around, delete
them, and so on. Pushing to another git repository will make git-annex
there aware of the annexed file, and it can be used to retrieve its
content from the key-value store.
# EXAMPLES
# git annex get video/hackity_hack_and_kaxxt.mov
get video/_why_hackity_hack_and_kaxxt.mov (not available)
I was unable to access these remotes: server
Try making some of these repositories available:
5863d8c0-d9a9-11df-adb2-af51e6559a49 -- my home file server
58d84e8a-d9ae-11df-a1aa-ab9aa8c00826 -- portable USB drive
ca20064c-dbb5-11df-b2fe-002170d25c55 -- backup SATA drive
failed
# sudo mount /media/usb
# git remote add usbdrive /media/usb
# git annex get video/hackity_hack_and_kaxxt.mov
get video/hackity_hack_and_kaxxt.mov (copying from usbdrive...) ok
# git commit -a -m "got a video I want to rewatch on the plane"
# git annex add iso
add iso/Debian_5.0.iso ok
# git commit -a -m "saving Debian CD for later"
# git annex push usbdrive iso
error: push not yet implemented!
# git annex drop iso
drop iso/Debian_5.0.iso ok
# git commit -a -m "freed up space"
# SUBCOMMANDS
Like many git commands, git-annex can be passed a path that
is either a file or a directory. In the latter case it acts on all relevant
files in the directory.
Many git-annex subcommands will stage changes for later `git commit` by you.
* add [path ...]
Adds files in the path to the annex. Files that are already checked into
git, or that git has been configured to ignore will be silently skipped.
* get [path ...]
Makes the content of annexed files available in this repository. Depending
on the backend used, this will involve copying them from another repository,
or downloading them, or transferring them from some kind of key-value store.
* drop [path ...]
Drops the content of annexed files from this repository.
git-annex may refuse to drop a content if the backend does not think
it is safe to do so.
* unannex [path ...]
Use this to undo an accidental add command. This is not the command you
should use if you intentionally annexed a file and don't want its contents
any more. In that case you should use `git annex drop` instead, and you
can also `git rm` the file.
* init description
Initializes git-annex with a descripotion of the git repository.
This is an optional, but recommended step.
* fix [path ...]
Fixes up symlinks that have become broken to again point to annexed content.
This is useful to run if you have been moving the symlinks around.
# OPTIONS
* --force
Force unsafe actions, such as dropping a file's content when no other
source of it can be verified to still exist. Use with care.
## CONFIGURATION
Like other git commands, git-annex is configured via `.git/config`.
* `annex.uuid` -- a unique UUID for this repository (automatically set)
* `annex.numcopies` -- number of copies of files to keep across all
repositories (default: 1)
* `annex.backends` -- space-separated list of names of
the key-value backends to use. The first listed is used to store
new files. (default: "WORM SHA1 URL")
* `remote.<name>.annex-cost` -- When determining which repository to
transfer annexed files from or to, ones with lower costs are preferred.
The default cost is 100 for local repositories, and 200 for remote
repositories. Note that other factors may be configured when pushing
files to repositories, in particular, whether the repository is on
a filesystem with sufficient free space.
* `remote.<name>.annex-uuid` -- git-annex caches UUIDs of repositories
here.
# FILES
These files are used, in your git repository:
`.git/annex/` contains the annexed file contents that are currently
available. Annexed files in your git repository symlink to that content.
`.git-annex/uuid.log` is used to map between repository UUID and
decscriptions. You may edit it.
`.git-annex/*.log` is where git-annex records its content tracking
information. These files should be committed to git.
`.git-annex/.gitattributes` is configured to use git's union merge driver
to avoid conflicts when merging files in the `.git-annex` directory.
# AUTHOR
Joey Hess <joey@ikiwiki.info>
<http://git-annex.branchable.com/>
Warning: this page is automatically made into a man page via [mdwn2man](http://git.ikiwiki.info/?p=ikiwiki;a=blob;f=mdwn2man;hb=HEAD). Edit with care

50
doc/index.mdwn Normal file
View file

@ -0,0 +1,50 @@
git-annex allows managing files with git, without checking the file
contents into git. While that may seem paradoxical, it is useful when
dealing with files larger than git can currently easily handle, whether due
to limitations in memory, checksumming time, or disk space.
Even without file content tracking, being able to manage files with git,
move files around and delete files with versioned directory trees, and use
branches and distributed clones, are all very handy reasons to use git. And
annexed files can co-exist in the same git repository with regularly
versioned files, which is convenient for maintaining documents, Makefiles,
etc that are associated with annexed files but that benefit from full
revision control.
[[!sidebar content="""
* **[[download]]**
* [[install]]
* [[news]]
* [[bugs]]
* [[contact]]
"""]]
## sample use cases
<table>
<tr>
<td>[[!inline feeds=no template=bare pages=use_case/bob]]</td>
<td>[[!inline feeds=no template=bare pages=use_case/alice]]</td>
</tr>
</table>
If that describes you, or if you're some from column A and some from column
B, then git-annex may be the tool you've been looking for to expand from
keeping all your small important files in git, to managing your large
files with git.
## documentation
* [[git-annex man page|git-annex]]
* [[key-value backends|backends]] for data storage
* [[location_tracking]] reminds you where git-annex has seen files
* git-annex prevents accidential data loss by [[tracking copies|copies]]
of your files
* [[what git annex is not|not]]
* git-annex is Free Software, licensed under the [[GPL]].
----
git-annex's wiki is powered by [Ikiwiki](http://ikiwiki.info/) and
hosted by [Branchable](http://branchable.com/).

7
doc/install.mdwn Normal file
View file

@ -0,0 +1,7 @@
To build and use git-annex, you will need:
* The Haskell Platform: <http://haskell.org/platform/>
* MissingH: <http://github.com/jgoerzen/missingh/wiki>
* uuid: <http://www.ossp.org/pkg/lib/uuid/>
Then just [[download]] git-annex and run: `make; make install`

View file

@ -0,0 +1,28 @@
git-annex keeps track of in which repositories it last saw a file's content.
This location tracking information is stored in `.git-annex/$key.log`.
Repositories record their UUID and the date when they get or drop
a file's content. (Git is configured to use a union merge for this file,
so the lines may be in arbitrary order, but it will never conflict.)
This location tracking information is useful if you have multiple
repositories, and not all are always accessible. For example, perhaps one
is on a home file server, and you are away from home. Then git-annex can
tell you what git remote it needs access to in order to get a file:
# git annex get myfile
get myfile(not available)
I was unable to access these remotes: home
Another way the location tracking comes in handy is if you put repositories
on removable USB drives, that might be archived away offline in a safe
place. In this sort of case, you probably don't have a git remotes
configured for every USB drive. So git-annex may have to resort to talking
about repository UUIDs. If you have previously used "git annex init"
to attach descriptions to those repositories, it will include their
descriptions to help you with finding them:
# git annex get myfile
get myfile (not available)
Try making some of these repositories available:
c0a28e06-d7ef-11df-885c-775af44f8882 -- USB archive drive 1
e1938fee-d95b-11df-96cc-002170d25c55

5
doc/news.mdwn Normal file
View file

@ -0,0 +1,5 @@
This is where announcements of new releases, features, and other news is
posted. git-annex users are recommended to subscribe to this page's RSS
feed.
[[!inline pages="./news/* and !*/Discussion" rootpage="news" show="30"]]

16
doc/not.mdwn Normal file
View file

@ -0,0 +1,16 @@
[[!meta title="what git-annex is not"]]
* git-annex is not a backup system. It may be a useful component of an
[[archival|use_case/bob]] system, or a way to deliver files to a backup
system.
For a backup system that uses git, take a look at
[bup](http://github.com/apenwarr/bup).
* git-annex is not unison, but if you're finding unison's checksumming
too slow, or its strict mirroring of everything to both places too
limiting, then git-annex could be a useful alternative.
* git-annex is not some flaky script that was quickly thrown together.
I wrote it in Haskell because I wanted it to be solid and to compile
down to a binary.

1
doc/templates/bare.tmpl vendored Normal file
View file

@ -0,0 +1 @@
<TMPL_VAR CONTENT>

18
doc/use_case/Alice.mdwn Normal file
View file

@ -0,0 +1,18 @@
### The Nomad
Alice is always on the move, often with her trusty netbook and a small
handheld terabyte USB drive, or a smaller USB keydrive. She has a server
out there on the net. All these things can have different files on them,
but Alice no longer has to deal with the tedious process of keeping them
manually in sync.
When she has 1 bar on her cell, Alice queues up interesting files on her
server for later. At a coffee shop, she has git-annex download them to her
USB drive. High in the sky or in a remote cabin, she catches up on
podcasts, videos, and games, first letting git-annex copy them from
her USB drive to the netbook (this saves battery power).
When she's done, she tells git-annex which to keep and which to remove.
They're all removed from her netbook to save space, and Alice knowns
that next time she syncs up to the net, her changes will be synced back
to her server.

18
doc/use_case/Bob.mdwn Normal file
View file

@ -0,0 +1,18 @@
### The Archivist
Bob has many drives to archive his data, most of them kept offline, in a
safe place.
With git-annex, Bob has a single directory tree that includes all
his files, even if their content is being stored offline. He can
reorganize his files using that tree, committing new versions to git,
without worry about accidentially deleting anything.
When Bob needs access to some files, git-annex can tell him which drive(s)
they're on, and easily make them available. Indeed, every drive knows what
is on every other drive.
Run in a cron job, git-annex adds new files to achival drives at night. It
also helps Bob keep track of intentional, and unintentional copies of
files, and logs information he can use to decide when it's time to duplicate
the content of old drives.

47
git-annex.hs Normal file
View file

@ -0,0 +1,47 @@
{- git-annex main program -}
import IO (try)
import System.IO
import System.Environment
import qualified Annex
import Types
import Core
import Commands
import qualified GitRepo as Git
import BackendList
main = do
args <- getArgs
gitrepo <- Git.repoFromCwd
state <- Annex.new gitrepo allBackends
(flags, actions) <- parseCmd args state
tryRun state $ [startup flags] ++ actions ++ [shutdown]
{- Runs a list of Annex actions. Catches IO errors and continues
- (but explicitly thrown errors terminate the whole command).
- Propigates an overall error status at the end.
-
- This runs in the IO monad, not in the Annex monad. It seems that
- exceptions can only be caught in the IO monad, not in a stacked monad;
- or more likely I missed an easy way to do it. So, I have to laboriously
- thread AnnexState through this function.
-}
tryRun :: AnnexState -> [Annex ()] -> IO ()
tryRun state actions = tryRun' state 0 actions
tryRun' state errnum (a:as) = do
result <- try $ Annex.run state a
case (result) of
Left err -> do
showErr err
tryRun' state (errnum + 1) as
Right (_,state') -> tryRun' state' errnum as
tryRun' state errnum [] = do
if (errnum > 0)
then error $ (show errnum) ++ " failed"
else return ()
{- Exception pretty-printing. -}
showErr e = do
hPutStrLn stderr $ "git-annex: " ++ (show e)
return ()

43
mdwn2man Executable file
View file

@ -0,0 +1,43 @@
#!/usr/bin/perl
# Warning: hack
my $prog=shift;
my $section=shift;
print ".TH $prog $section\n";
while (<>) {
s{(\\?)\[\[([^\s\|\]]+)(\|[^\s\]]+)?\]\]}{$1 ? "[[$2]]" : $2}eg;
s/\`//g;
s/^\s*\./\\&./g;
if (/^#\s/) {
s/^#\s/.SH /;
<>; # blank;
}
s/^ +//;
s/^\t/ /;
s/-/\\-/g;
s/^Warning:.*//g;
s/^$/.PP\n/;
s/^\*\s+(.*)/.IP "$1"/;
next if $_ eq ".PP\n" && $skippara;
if (/^.IP /) {
$inlist=1;
$spippara=0;
}
elsif (/.SH/) {
$skippara=0;
$inlist=0;
}
elsif (/^\./) {
$skippara=1;
}
else {
$skippara=0;
}
if ($inlist && $_ eq ".PP\n") {
$_=".IP\n";
}
print $_;
}