This commit is contained in:
Joey Hess 2011-10-04 00:40:47 -04:00
parent ff21fd4a65
commit cfe21e85e7
73 changed files with 173 additions and 173 deletions

336
Annex/Branch.hs Normal file
View file

@ -0,0 +1,336 @@
{- management of the git-annex branch
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Branch (
create,
update,
get,
change,
commit,
files,
refExists,
hasOrigin,
hasSomeBranch,
name
) where
import System.IO.Binary
import System.Exit
import qualified Data.ByteString.Lazy.Char8 as L
import Annex.Common
import Annex.Exception
import Types.BranchState
import qualified Git
import qualified Git.UnionMerge
import qualified Annex
import Annex.CatFile
type GitRef = String
{- Name of the branch that is used to store git-annex's information. -}
name :: GitRef
name = "git-annex"
{- Fully qualified name of the branch. -}
fullname :: GitRef
fullname = "refs/heads/" ++ name
{- Branch's name in origin. -}
originname :: GitRef
originname = "origin/" ++ name
{- A separate index file for the branch. -}
index :: Git.Repo -> FilePath
index g = gitAnnexDir g </> "index"
{- Populates the branch's index file with the current branch contents.
-
- Usually, this is only done when the index doesn't yet exist, and
- the index is used to build up changes to be commited to the branch,
- and merge in changes from other branches.
-}
genIndex :: Git.Repo -> IO ()
genIndex g = Git.UnionMerge.ls_tree g fullname >>= Git.UnionMerge.update_index g
{- Runs an action using the branch's index file. -}
withIndex :: Annex a -> Annex a
withIndex = withIndex' False
withIndex' :: Bool -> Annex a -> Annex a
withIndex' bootstrapping a = do
g <- gitRepo
let f = index g
bracketIO (Git.useIndex f) id $ do
unlessM (liftIO $ doesFileExist f) $ do
unless bootstrapping create
liftIO $ createDirectoryIfMissing True $ takeDirectory f
unless bootstrapping $ liftIO $ genIndex g
a
withIndexUpdate :: Annex a -> Annex a
withIndexUpdate a = update >> withIndex a
getState :: Annex BranchState
getState = Annex.getState Annex.branchstate
setState :: BranchState -> Annex ()
setState state = Annex.changeState $ \s -> s { Annex.branchstate = state }
setCache :: FilePath -> String -> Annex ()
setCache file content = do
state <- getState
setState state { cachedFile = Just file, cachedContent = content }
invalidateCache :: Annex ()
invalidateCache = do
state <- getState
setState state { cachedFile = Nothing, cachedContent = "" }
getCache :: FilePath -> Annex (Maybe String)
getCache file = getState >>= go
where
go state
| cachedFile state == Just file =
return $ Just $ cachedContent state
| otherwise = return Nothing
{- Creates the branch, if it does not already exist. -}
create :: Annex ()
create = unlessM hasBranch $ do
g <- gitRepo
e <- hasOrigin
if e
then liftIO $ Git.run g "branch" [Param name, Param originname]
else withIndex' True $
liftIO $ Git.commit g "branch created" fullname []
{- Stages the journal, and commits staged changes to the branch. -}
commit :: String -> Annex ()
commit message = do
fs <- getJournalFiles
when (not $ null fs) $ lockJournal $ do
stageJournalFiles fs
g <- gitRepo
withIndex $ liftIO $ Git.commit g message fullname [fullname]
{- Ensures that the branch is up-to-date; should be called before
- data is read from it. Runs only once per git-annex run. -}
update :: Annex ()
update = do
state <- getState
unless (branchUpdated state) $ do
-- check what needs updating before taking the lock
fs <- getJournalFiles
refs <- filterM checkref =<< siblingBranches
unless (null fs && null refs) $ withIndex $ lockJournal $ do
{- Before refs are merged into the index, it's
- important to first stage the journal into the
- index. Otherwise, any changes in the journal
- would later get staged, and might overwrite
- changes made during the merge.
-
- It would be cleaner to handle the merge by
- updating the journal, not the index, with changes
- from the branches.
-}
unless (null fs) $ stageJournalFiles fs
mapM_ mergeref refs
g <- gitRepo
liftIO $ Git.commit g "update" fullname (fullname:refs)
Annex.changeState $ \s -> s { Annex.branchstate = state { branchUpdated = True } }
invalidateCache
where
checkref ref = do
g <- gitRepo
-- checking with log to see if there have been changes
-- is less expensive than always merging
diffs <- liftIO $ Git.pipeRead g [
Param "log",
Param (name++".."++ref),
Params "--oneline -n1"
]
return $ not $ L.null diffs
mergeref ref = do
showSideAction $ "merging " ++
Git.refDescribe ref ++ " into " ++ name
{- By passing only one ref, it is actually
- merged into the index, preserving any
- changes that may already be staged.
-
- However, any changes in the git-annex
- branch that are *not* reflected in the
- index will be removed. So, documentation
- advises users not to directly modify the
- branch.
-}
g <- gitRepo
liftIO $ Git.UnionMerge.merge g [ref]
return $ Just ref
{- Checks if a git ref exists. -}
refExists :: GitRef -> Annex Bool
refExists ref = do
g <- gitRepo
liftIO $ Git.runBool g "show-ref"
[Param "--verify", Param "-q", Param ref]
{- Does the main git-annex branch exist? -}
hasBranch :: Annex Bool
hasBranch = refExists fullname
{- Does origin/git-annex exist? -}
hasOrigin :: Annex Bool
hasOrigin = refExists originname
{- Does the git-annex branch or a foo/git-annex branch exist? -}
hasSomeBranch :: Annex Bool
hasSomeBranch = not . null <$> siblingBranches
{- List of all git-annex branches, including the main one and any
- from remotes. -}
siblingBranches :: Annex [String]
siblingBranches = do
g <- gitRepo
r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name]
return $ map (last . words . L.unpack) (L.lines r)
{- Applies a function to modifiy the content of a file. -}
change :: FilePath -> (String -> String) -> Annex ()
change file a = lockJournal $ get file >>= return . a >>= set file
{- Records new content of a file into the journal. -}
set :: FilePath -> String -> Annex ()
set file content = do
setJournalFile file content
setCache file content
{- Gets the content of a file on the branch, or content from the journal, or
- staged in the index.
-
- Returns an empty string if the file doesn't exist yet. -}
get :: FilePath -> Annex String
get file = do
cached <- getCache file
case cached of
Just content -> return content
Nothing -> do
j <- getJournalFile file
case j of
Just content -> do
setCache file content
return content
Nothing -> withIndexUpdate $ do
content <- catFile fullname file
setCache file content
return content
{- Lists all files on the branch. There may be duplicates in the list. -}
files :: Annex [FilePath]
files = withIndexUpdate $ do
g <- gitRepo
bfiles <- liftIO $ Git.pipeNullSplit g
[Params "ls-tree --name-only -r -z", Param fullname]
jfiles <- getJournalledFiles
return $ jfiles ++ bfiles
{- Records content for a file in the branch to the journal.
-
- Using the journal, rather than immediatly staging content to the index
- avoids git needing to rewrite the index after every change. -}
setJournalFile :: FilePath -> String -> Annex ()
setJournalFile file content = do
g <- gitRepo
liftIO $ catch (write g) $ const $ do
createDirectoryIfMissing True $ gitAnnexJournalDir g
createDirectoryIfMissing True $ gitAnnexTmpDir g
write g
where
-- journal file is written atomically
write g = do
let jfile = journalFile g file
let tmpfile = gitAnnexTmpDir g </> takeFileName jfile
writeBinaryFile tmpfile content
renameFile tmpfile jfile
{- Gets any journalled content for a file in the branch. -}
getJournalFile :: FilePath -> Annex (Maybe String)
getJournalFile file = do
g <- gitRepo
liftIO $ catch (liftM Just . readFileStrict $ journalFile g file)
(const $ return Nothing)
{- List of files that have updated content in the journal. -}
getJournalledFiles :: Annex [FilePath]
getJournalledFiles = map fileJournal <$> getJournalFiles
{- List of existing journal files. -}
getJournalFiles :: Annex [FilePath]
getJournalFiles = do
g <- gitRepo
fs <- liftIO $ catch (getDirectoryContents $ gitAnnexJournalDir g)
(const $ return [])
return $ filter (`notElem` [".", ".."]) fs
{- Stages the specified journalfiles. -}
stageJournalFiles :: [FilePath] -> Annex ()
stageJournalFiles fs = do
g <- gitRepo
withIndex $ liftIO $ do
let dir = gitAnnexJournalDir g
let paths = map (dir </>) fs
-- inject all the journal files directly into git
-- in one quick command
(pid, fromh, toh) <- hPipeBoth "git" $ toCommand $
Git.gitCommandLine g [Param "hash-object", Param "-w", Param "--stdin-paths"]
_ <- forkProcess $ do
hPutStr toh $ unlines paths
hClose toh
exitSuccess
hClose toh
s <- hGetContents fromh
-- update the index, also in just one command
Git.UnionMerge.update_index g $
index_lines (lines s) $ map fileJournal fs
hClose fromh
forceSuccess pid
mapM_ removeFile paths
where
index_lines shas = map genline . zip shas
genline (sha, file) = Git.UnionMerge.update_index_line sha file
{- Produces a filename to use in the journal for a file on the branch.
-
- The journal typically won't have a lot of files in it, so the hashing
- used in the branch is not necessary, and all the files are put directly
- in the journal directory.
-}
journalFile :: Git.Repo -> FilePath -> FilePath
journalFile repo file = gitAnnexJournalDir repo </> concatMap mangle file
where
mangle '/' = "_"
mangle '_' = "__"
mangle c = [c]
{- Converts a journal file (relative to the journal dir) back to the
- filename on the branch. -}
fileJournal :: FilePath -> FilePath
fileJournal = replace "//" "_" . replace "_" "/"
{- Runs an action that modifies the journal, using locking to avoid
- contention with other git-annex processes. -}
lockJournal :: Annex a -> Annex a
lockJournal a = do
g <- gitRepo
let file = gitAnnexJournalLock g
bracketIO (lock file) unlock a
where
lock file = do
l <- createFile file stdFileMode
waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0)
return l
unlock = closeFd

24
Annex/CatFile.hs Normal file
View file

@ -0,0 +1,24 @@
{- git cat-file interface, with handle automatically stored in the Annex monad
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.CatFile (
catFile
) where
import Annex.Common
import qualified Git.CatFile
import qualified Annex
catFile :: String -> FilePath -> Annex String
catFile branch file = maybe startup go =<< Annex.getState Annex.catfilehandle
where
startup = do
g <- gitRepo
h <- liftIO $ Git.CatFile.catFileStart g
Annex.changeState $ \s -> s { Annex.catfilehandle = Just h }
go h
go h = liftIO $ Git.CatFile.catFile h branch file

13
Annex/Common.hs Normal file
View file

@ -0,0 +1,13 @@
module Annex.Common (
module Common,
module Types,
module Annex,
module Locations,
module Messages,
) where
import Common
import Types
import Annex (gitRepo)
import Locations
import Messages

237
Annex/Content.hs Normal file
View file

@ -0,0 +1,237 @@
{- git-annex file content managing
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Content (
inAnnex,
calcGitLink,
logStatus,
getViaTmp,
getViaTmpUnchecked,
withTmp,
checkDiskSpace,
moveAnnex,
removeAnnex,
fromAnnex,
moveBad,
getKeysPresent,
saveState
) where
import Annex.Common
import LocationLog
import UUID
import qualified Git
import qualified Annex
import qualified Annex.Queue
import qualified Annex.Branch
import Utility.StatFS
import Utility.FileMode
import Types.Key
import Utility.DataUnits
import Config
{- Checks if a given key is currently present in the gitAnnexLocation. -}
inAnnex :: Key -> Annex Bool
inAnnex key = do
g <- gitRepo
when (Git.repoIsUrl g) $ error "inAnnex cannot check remote repo"
liftIO $ doesFileExist $ gitAnnexLocation g key
{- Calculates the relative path to use to link a file to a key. -}
calcGitLink :: FilePath -> Key -> Annex FilePath
calcGitLink file key = do
g <- gitRepo
cwd <- liftIO getCurrentDirectory
let absfile = fromMaybe whoops $ absNormPath cwd file
return $ relPathDirToFile (parentDir absfile)
(Git.workTree g) </> ".git" </> annexLocation key
where
whoops = error $ "unable to normalize " ++ file
{- Updates the LocationLog when a key's presence changes in the current
- repository. -}
logStatus :: Key -> LogStatus -> Annex ()
logStatus key status = do
g <- gitRepo
u <- getUUID g
logChange g key u status
{- Runs an action, passing it a temporary filename to download,
- and if the action succeeds, moves the temp file into
- the annex as a key's content. -}
getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool
getViaTmp key action = do
g <- gitRepo
let tmp = gitAnnexTmpLocation g key
-- Check that there is enough free disk space.
-- When the temp file already exists, count the space
-- it is using as free.
e <- liftIO $ doesFileExist tmp
if e
then do
stat <- liftIO $ getFileStatus tmp
checkDiskSpace' (fromIntegral $ fileSize stat) key
else checkDiskSpace key
when e $ liftIO $ allowWrite tmp
getViaTmpUnchecked key action
prepTmp :: Key -> Annex FilePath
prepTmp key = do
g <- gitRepo
let tmp = gitAnnexTmpLocation g key
liftIO $ createDirectoryIfMissing True (parentDir tmp)
return tmp
{- Like getViaTmp, but does not check that there is enough disk space
- for the incoming key. For use when the key content is already on disk
- and not being copied into place. -}
getViaTmpUnchecked :: Key -> (FilePath -> Annex Bool) -> Annex Bool
getViaTmpUnchecked key action = do
tmp <- prepTmp key
success <- action tmp
if success
then do
moveAnnex key tmp
logStatus key InfoPresent
return True
else do
-- the tmp file is left behind, in case caller wants
-- to resume its transfer
return False
{- Creates a temp file, runs an action on it, and cleans up the temp file. -}
withTmp :: Key -> (FilePath -> Annex a) -> Annex a
withTmp key action = do
tmp <- prepTmp key
res <- action tmp
liftIO $ whenM (doesFileExist tmp) $ liftIO $ removeFile tmp
return res
{- Checks that there is disk space available to store a given key,
- throwing an error if not. -}
checkDiskSpace :: Key -> Annex ()
checkDiskSpace = checkDiskSpace' 0
checkDiskSpace' :: Integer -> Key -> Annex ()
checkDiskSpace' adjustment key = do
g <- gitRepo
r <- getConfig g "diskreserve" ""
let reserve = fromMaybe megabyte $ readSize dataUnits r
stats <- liftIO $ getFileSystemStats (gitAnnexDir g)
case (stats, keySize key) of
(Nothing, _) -> return ()
(_, Nothing) -> return ()
(Just (FileSystemStats { fsStatBytesAvailable = have }), Just need) ->
when (need + reserve > have + adjustment) $
needmorespace (need + reserve - have - adjustment)
where
megabyte :: Integer
megabyte = 1000000
needmorespace n = unlessM (Annex.getState Annex.force) $
error $ "not enough free space, need " ++
roughSize storageUnits True n ++
" more (use --force to override this check or adjust annex.diskreserve)"
{- Moves a file into .git/annex/objects/
-
- What if the key there already has content? This could happen for
- various reasons; perhaps the same content is being annexed again.
- Perhaps there has been a hash collision generating the keys.
-
- The current strategy is to assume that in this case it's safe to delete
- one of the two copies of the content; and the one already in the annex
- is left there, assuming it's the original, canonical copy.
-
- I considered being more paranoid, and checking that both files had
- the same content. Decided against it because A) users explicitly choose
- a backend based on its hashing properties and so if they're dealing
- with colliding files it's their own fault and B) adding such a check
- would not catch all cases of colliding keys. For example, perhaps
- a remote has a key; if it's then added again with different content then
- the overall system now has two different peices of content for that
- key, and one of them will probably get deleted later. So, adding the
- check here would only raise expectations that git-annex cannot truely
- meet.
-}
moveAnnex :: Key -> FilePath -> Annex ()
moveAnnex key src = do
g <- gitRepo
let dest = gitAnnexLocation g key
let dir = parentDir dest
e <- liftIO $ doesFileExist dest
if e
then liftIO $ removeFile src
else liftIO $ do
createDirectoryIfMissing True dir
allowWrite dir -- in case the directory already exists
renameFile src dest
preventWrite dest
preventWrite dir
withObjectLoc :: Key -> ((FilePath, FilePath) -> Annex a) -> Annex a
withObjectLoc key a = do
g <- gitRepo
let file = gitAnnexLocation g key
let dir = parentDir file
a (dir, file)
{- Removes a key's file from .git/annex/objects/ -}
removeAnnex :: Key -> Annex ()
removeAnnex key = withObjectLoc key $ \(dir, file) -> liftIO $ do
allowWrite dir
removeFile file
removeDirectory dir
{- Moves a key's file out of .git/annex/objects/ -}
fromAnnex :: Key -> FilePath -> Annex ()
fromAnnex key dest = withObjectLoc key $ \(dir, file) -> liftIO $ do
allowWrite dir
allowWrite file
renameFile file dest
removeDirectory dir
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
- returns the file it was moved to. -}
moveBad :: Key -> Annex FilePath
moveBad key = do
g <- gitRepo
let src = gitAnnexLocation g key
let dest = gitAnnexBadDir g </> takeFileName src
liftIO $ do
createDirectoryIfMissing True (parentDir dest)
allowWrite (parentDir src)
renameFile src dest
removeDirectory (parentDir src)
logStatus key InfoMissing
return dest
{- List of keys whose content exists in .git/annex/objects/ -}
getKeysPresent :: Annex [Key]
getKeysPresent = do
g <- gitRepo
getKeysPresent' $ gitAnnexObjectDir g
getKeysPresent' :: FilePath -> Annex [Key]
getKeysPresent' dir = do
exists <- liftIO $ doesDirectoryExist dir
if not exists
then return []
else liftIO $ do
-- 2 levels of hashing
levela <- dirContents dir
levelb <- mapM dirContents levela
contents <- mapM dirContents (concat levelb)
let files = concat contents
return $ mapMaybe (fileKey . takeFileName) files
{- Things to do to record changes to content. -}
saveState :: Annex ()
saveState = do
Annex.Queue.flush False
Annex.Branch.commit "update"

View file

@ -15,7 +15,7 @@ import Control.Exception.Control (handle)
import Control.Monad.IO.Control (liftIOOp)
import Control.Exception hiding (handle, throw)
import AnnexCommon
import Annex.Common
{- Runs an Annex action, with setup and cleanup both in the IO monad. -}
bracketIO :: IO c -> (c -> IO b) -> Annex a -> Annex a

42
Annex/Queue.hs Normal file
View file

@ -0,0 +1,42 @@
{- git-annex command queue
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Queue (
add,
flush,
flushWhenFull
) where
import Annex.Common
import Annex
import qualified Git.Queue
{- Adds a git command to the queue. -}
add :: String -> [CommandParam] -> [FilePath] -> Annex ()
add command params files = do
q <- getState repoqueue
store $ Git.Queue.add q command params files
{- Runs the queue if it is full. Should be called periodically. -}
flushWhenFull :: Annex ()
flushWhenFull = do
q <- getState repoqueue
when (Git.Queue.full q) $ flush False
{- Runs (and empties) the queue. -}
flush :: Bool -> Annex ()
flush silent = do
q <- getState repoqueue
unless (0 == Git.Queue.size q) $ do
unless silent $
showSideAction "Recording state in git"
g <- gitRepo
q' <- liftIO $ Git.Queue.flush g q
store q'
store :: Git.Queue.Queue -> Annex ()
store q = changeState $ \s -> s { repoqueue = q }

46
Annex/Version.hs Normal file
View file

@ -0,0 +1,46 @@
{- git-annex repository versioning
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Version where
import Annex.Common
import qualified Git
import Config
type Version = String
defaultVersion :: Version
defaultVersion = "3"
supportedVersions :: [Version]
supportedVersions = [defaultVersion]
upgradableVersions :: [Version]
upgradableVersions = ["0", "1", "2"]
versionField :: String
versionField = "annex.version"
getVersion :: Annex (Maybe Version)
getVersion = do
g <- gitRepo
let v = Git.configGet g versionField ""
if not $ null v
then return $ Just v
else return Nothing
setVersion :: Annex ()
setVersion = setConfig versionField defaultVersion
checkVersion :: Version -> Annex ()
checkVersion v
| v `elem` supportedVersions = return ()
| v `elem` upgradableVersions = err "Upgrade this repository: git-annex upgrade"
| otherwise = err "Upgrade git-annex."
where
err msg = error $ "Repository version " ++ v ++
" is not supported. " ++ msg