support for checking presence of objects in direct mode

Also for dropping objects in direct mode.

Checking presence reliably needs a cache of mtime, size, and inode.
This way, if a file is modified, keys that point to it are no longer
present.

Also, the code for restoring the symlink when removing objects is
unnecessarily messy. calcGitLink was generating links starting with
"../../remote/.git/", when running "git annex move --from remote".
I put in a workaround, but calcGitLink should probably be fixed.

There is not yet support for getting objects from repositories in direct
mode; it still looks for content in .git/annex/objects, and there's no
once place I can change to fix that.

Also, getting objects from direct mode repositories is problematic since
the can be changed while the object is being transferred. It probably needs
to quarantine it first.
This commit is contained in:
Joey Hess 2012-12-07 17:28:23 -04:00
parent 3898d8c091
commit ef24751922
4 changed files with 181 additions and 58 deletions

View file

@ -1,6 +1,6 @@
{- git-annex file content managing
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
- Copyright 2010,2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -48,21 +48,57 @@ import Config
import Annex.Exception
import Git.SharedRepository
import Annex.Perms
import Annex.Content.Direct
{- Performs an action, passing it the location to use for a key's content.
-
- In direct mode, the associated files will be passed. But, if there are
- no associated files for a key, the indirect mode action will be
- performed instead. -}
withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a
withObjectLoc key indirect direct = ifM isDirect
( do
fs <- associatedFiles key
if null fs
then goindirect
else direct fs
, goindirect
)
where
goindirect = indirect =<< inRepo (gitAnnexLocation key)
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
inAnnex = inAnnex' doesFileExist
inAnnex' :: (FilePath -> IO a) -> Key -> Annex a
inAnnex' a key = do
whenM (fromRepo Git.repoIsUrl) $
error "inAnnex cannot check remote repo"
inRepo $ \g -> gitAnnexLocation key g >>= a
inAnnex = inAnnex' id False $ liftIO . doesFileExist
{- Generic inAnnex, handling both indirect and direct mode.
-
- In direct mode, at least one of the associated files must pass the
- check. Additionally, the file must be unmodified.
-}
inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a
inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect
where
checkindirect loc = do
whenM (fromRepo Git.repoIsUrl) $
error "inAnnex cannot check remote repo"
check loc
checkdirect [] = return bad
checkdirect (loc:locs) = do
r <- check loc
if isgood r
then ifM (unmodifed key loc)
( return r
, checkdirect locs
)
else checkdirect locs
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
inAnnexSafe = inAnnex' $ \f -> openforlock f >>= check
inAnnexSafe = inAnnex' (maybe False id) (Just False) go
where
go f = liftIO $ openforlock f >>= check
openforlock f = catchMaybeIO $
openFd f ReadOnly Nothing defaultFileFlags
check Nothing = return is_missing
@ -195,6 +231,7 @@ checkDiskSpace destination key alreadythere = do
forcemsg = " (use --force to override this check or adjust annex.diskreserve)"
{- Moves a key's content into .git/annex/objects/
-
- In direct mode, moves it to the associated file, or files.
-
- What if the key there already has content? This could happen for
@ -217,13 +254,9 @@ checkDiskSpace destination key alreadythere = do
- meet.
-}
moveAnnex :: Key -> FilePath -> Annex ()
moveAnnex key src = ifM isDirect
( storefiles =<< associatedFiles key
, storeobject
)
moveAnnex key src = withObjectLoc key storeobject storedirect
where
storeobject = do
dest <- inRepo $ gitAnnexLocation key
storeobject dest = do
ifM (liftIO $ doesFileExist dest)
( liftIO $ removeFile src
, do
@ -232,41 +265,22 @@ moveAnnex key src = ifM isDirect
freezeContent dest
freezeContentDir dest
)
storefiles [] = storeobject
storefiles (dest:fs) = do
storedirect [] = storeobject =<< inRepo (gitAnnexLocation key)
storedirect (dest:fs) = do
thawContent src
liftIO $ replacefile dest $ moveFile src
liftIO $ forM_ fs $ \f -> replacefile f $ createLink dest
replacefile file a = do
{- Remove any symlink or existing file. -}
r <- tryIO $ removeFile file
{- Only need to create parent directory if file did not exist. -}
case r of
Left _ -> createDirectoryIfMissing True (parentDir file)
_ -> noop
a file
liftIO $ replaceFile dest $ moveFile src
liftIO $ forM_ fs $ \f -> replaceFile f $ createLink dest
{- Files in the tree that are associated with a key.
- For use in direct mode.
-
- When no known associated files exist, returns the gitAnnexLocation. -}
associatedFiles :: Key -> Annex [FilePath]
associatedFiles key = do
mapping <- inRepo $ gitAnnexMapping key
files <- liftIO $ catchDefaultIO [] $ lines <$> readFile mapping
if null files
then do
l <- inRepo $ gitAnnexLocation key
return [l]
else do
top <- fromRepo Git.repoPath
return $ map (top </>) files
withObjectLoc :: Key -> ((FilePath, FilePath) -> Annex a) -> Annex a
withObjectLoc key a = do
file <- inRepo $ gitAnnexLocation key
let dir = parentDir file
a (dir, file)
{- Replaces any existing file with a new version, by running an action.
- First, makes sure the file is deleted. Or, if it didn't already exist,
- makes sure the parent directory exists. -}
replaceFile :: FilePath -> (FilePath -> IO ()) -> IO ()
replaceFile file a = do
r <- tryIO $ removeFile file
case r of
Left _ -> createDirectoryIfMissing True (parentDir file)
_ -> noop
a file
cleanObjectLoc :: Key -> Annex ()
cleanObjectLoc key = do
@ -279,18 +293,33 @@ cleanObjectLoc key = do
maybe noop (const $ removeparents dir (n-1))
<=< catchMaybeIO $ removeDirectory dir
{- Removes a key's file from .git/annex/objects/ -}
{- Removes a key's file from .git/annex/objects/
-
- In direct mode, deletes the associated files or files, and replaces
- them with symlinks. -}
removeAnnex :: Key -> Annex ()
removeAnnex key = withObjectLoc key $ \(dir, file) -> do
liftIO $ do
allowWrite dir
removeFile file
cleanObjectLoc key
removeAnnex key = withObjectLoc key remove removedirect
where
remove file = do
liftIO $ do
allowWrite $ parentDir file
removeFile file
cleanObjectLoc key
removedirect fs = mapM_ resetfile fs
resetfile f = do
l <- calcGitLink f key
top <- fromRepo Git.repoPath
cwd <- liftIO getCurrentDirectory
let top' = fromMaybe top $ absNormPath cwd top
let l' = relPathDirToFile top' (fromMaybe l $ absNormPath top' l)
liftIO $ replaceFile f $ const $
createSymbolicLink l' f
{- Moves a key's file out of .git/annex/objects/ -}
fromAnnex :: Key -> FilePath -> Annex ()
fromAnnex key dest = withObjectLoc key $ \(dir, file) -> do
liftIO $ allowWrite dir
fromAnnex key dest = do
file <- inRepo $ gitAnnexLocation key
liftIO $ allowWrite $ parentDir file
thawContent file
liftIO $ moveFile file dest
cleanObjectLoc key

78
Annex/Content/Direct.hs Normal file
View file

@ -0,0 +1,78 @@
{- git-annex file content managing for direct mode
-
- Copyright 2010,2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Content.Direct (
associatedFiles,
unmodifed,
getCache,
showCache,
) where
import Common.Annex
import qualified Git
import System.Posix.Types
{- Files in the tree that are associated with a key.
-
- When no known associated files exist, returns the gitAnnexLocation. -}
associatedFiles :: Key -> Annex [FilePath]
associatedFiles key = do
mapping <- inRepo $ gitAnnexMapping key
files <- liftIO $ catchDefaultIO [] $ lines <$> readFile mapping
if null files
then do
l <- inRepo $ gitAnnexLocation key
return [l]
else do
top <- fromRepo Git.repoPath
return $ map (top </>) files
{- Checks if a file in the tree, associated with a key, has not been modified.
-
- To avoid needing to fsck the file's content, which can involve an
- expensive checksum, this relies on a cache that contains the file's
- expected mtime and inode.
-}
unmodifed :: Key -> FilePath -> Annex Bool
unmodifed key file = do
cachefile <- inRepo $ gitAnnexCache key
liftIO $ do
curr <- getCache file
old <- catchDefaultIO Nothing $ readCache <$> readFile cachefile
return $ isJust curr && curr == old
{- Cache a file's inode, size, and modification time to determine if it's
- been changed. -}
data Cache = Cache FileID FileOffset EpochTime
deriving (Eq)
showCache :: Cache -> String
showCache (Cache inode size mtime) = unwords
[ show inode
, show size
, show mtime
]
readCache :: String -> Maybe Cache
readCache s = case words s of
(inode:size:mtime:_) -> Cache
<$> readish inode
<*> readish size
<*> readish mtime
_ -> Nothing
getCache :: FilePath -> IO (Maybe Cache)
getCache f = catchDefaultIO Nothing $ toCache <$> getFileStatus f
toCache :: FileStatus -> Maybe Cache
toCache s
| isRegularFile s = Just $ Cache
(fileID s)
(fileSize s)
(modificationTime s)
| otherwise = Nothing

View file

@ -12,6 +12,7 @@ module Locations (
keyPath,
gitAnnexLocation,
gitAnnexMapping,
gitAnnexCache,
annexLocations,
annexLocation,
gitAnnexDir,
@ -115,6 +116,14 @@ gitAnnexMapping key r = do
loc <- gitAnnexLocation key r
return $ loc ++ ".map"
{- File that caches information about a key's content, used to determine
- if a file has changed.
- Used in direct mode. -}
gitAnnexCache :: Key -> Git.Repo -> IO FilePath
gitAnnexCache key r = do
loc <- gitAnnexLocation key r
return $ loc ++ ".cache"
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r = addTrailingPathSeparator $ Git.localGitDir r </> annexDir

View file

@ -43,11 +43,12 @@ is converted to a real file when it becomes present.
## concrete design
* Enable with annex.nosymlink or such config option.
* Use .git/ for the git repo, but `.git/annex/objects` won't be used.
* Enable with annex.direct
* Use .git/ for the git repo, but `.git/annex/objects` won't be used
for object storage.
* `git status` and similar will show all files as type changed, and
`git commit` would be a very bad idea. Just don't support users running
git commands that affect the repository in this mode.
git commands that affect the repository in this mode. Probably.
* However, `git status` and similar also will show deleted and new files,
which will be helpful for the assistant to use when starting up.
* Cache the mtime, size etc of files, and use this to detect when they've been
@ -61,6 +62,8 @@ is converted to a real file when it becomes present.
can map to multiple files. And that when a file is deleted or moved, the
mapping needs to be updated.
* May need a reverse mapping, from files in the tree to keys? TBD
(Needed to make things like `git annex drop` that want to map from the
file back to the key work.)
* The existing watch code detects when a file gets closed, and in this
mode, it could be a new file, or a modified file, or an unchanged file.
For a modified file, can compare mtime, size, etc, to see if it needs
@ -73,3 +76,7 @@ is converted to a real file when it becomes present.
to files in this remote would not be noticed and committed, unless
a git-annex command were added to do so.
Getting it basically working as a remote would be a good 1st step.
* It could also be used without the assistant as a repository that
the user uses directly. Would need some git-annex commands
to merge changes into the repo, update caches, and commit changes.
This could all be done by "git annex sync".