From 20ca89dfa3e3ea9e811e1682ef155510d91983a4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 13:02:56 -0400 Subject: [PATCH 01/96] skeleton smudge/clean filters --- CmdLine/GitAnnex.hs | 4 ++++ Command/Clean.hs | 29 +++++++++++++++++++++++++++++ Command/Smudge.hs | 29 +++++++++++++++++++++++++++++ doc/git-annex-clean.mdwn | 36 ++++++++++++++++++++++++++++++++++++ doc/git-annex-smudge.mdwn | 31 +++++++++++++++++++++++++++++++ doc/git-annex.mdwn | 6 ++++++ doc/todo/smudge.mdwn | 4 ++-- 7 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 Command/Clean.hs create mode 100644 Command/Smudge.hs create mode 100644 doc/git-annex-clean.mdwn create mode 100644 doc/git-annex-smudge.mdwn diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index f585bff3ed..ee31082064 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -96,6 +96,8 @@ import qualified Command.Upgrade import qualified Command.Forget import qualified Command.Proxy import qualified Command.DiffDriver +import qualified Command.Smudge +import qualified Command.Clean import qualified Command.Undo import qualified Command.Version #ifdef WITH_ASSISTANT @@ -201,6 +203,8 @@ cmds testoptparser testrunner = , Command.Forget.cmd , Command.Proxy.cmd , Command.DiffDriver.cmd + , Command.Smudge.cmd + , Command.Clean.cmd , Command.Undo.cmd , Command.Version.cmd #ifdef WITH_ASSISTANT diff --git a/Command/Clean.hs b/Command/Clean.hs new file mode 100644 index 0000000000..9af862fb07 --- /dev/null +++ b/Command/Clean.hs @@ -0,0 +1,29 @@ +{- git-annex command + - + - Copyright 2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Clean where + +import Common.Annex +import Command +import Annex.Content +import Annex.Link +import Git.Types + +cmd :: Command +cmd = dontCheck repoExists $ + command "clean" SectionPlumbing + "git clean filter" + paramFile (withParams seek) + +seek :: CmdParams -> CommandSeek +seek = withWords start + +start :: [String] -> CommandStart +start [file] = do + error ("clean " ++ file) +start [] = error "clean filter run without filename; upgrade git" +start _ = error "clean filter passed multiple filenames" diff --git a/Command/Smudge.hs b/Command/Smudge.hs new file mode 100644 index 0000000000..22f9efd696 --- /dev/null +++ b/Command/Smudge.hs @@ -0,0 +1,29 @@ +{- git-annex command + - + - Copyright 2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Smudge where + +import Common.Annex +import Command +import Annex.Content +import Annex.Link +import Git.Types + +cmd :: Command +cmd = dontCheck repoExists $ + command "smudge" SectionPlumbing + "git smudge filter" + paramFile (withParams seek) + +seek :: CmdParams -> CommandSeek +seek = withWords start + +start :: [String] -> CommandStart +start [file] = do + error ("smudge " ++ file) +start [] = error "smudge filter run without filename; upgrade git" +start _ = error "smudge filter passed multiple filenames" diff --git a/doc/git-annex-clean.mdwn b/doc/git-annex-clean.mdwn new file mode 100644 index 0000000000..13099a5447 --- /dev/null +++ b/doc/git-annex-clean.mdwn @@ -0,0 +1,36 @@ +# NAME + +git-annex clean - git filter driver for git-annex + +# SYNOPSIS + +git annex clean + +# DESCRIPTION + +When git-annex is used as a git filter driver, this command is run +by git commands such as `git add`. It generates a file that +is added to the git repository and points to the git-annex object +containing the content of a large file. + +To configure git to use git-annex as a git filter driver, place the +following in the .gitattributes file: + + * filter=annex + .* !filter + +The annex.largefiles config is consulted to decide if a given file should +be added to git as-is, or if its content are large enough to need to use +git-annex. + +# SEE ALSO + +[[git-annex]](1) + +[[git-annex-smudge]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn new file mode 100644 index 0000000000..ae28be2c7d --- /dev/null +++ b/doc/git-annex-smudge.mdwn @@ -0,0 +1,31 @@ +# NAME + +git-annex smudge - git filter driver for git-annex + +# SYNOPSIS + +git annex smudge + +# DESCRIPTION + +When git-annex is used as a git filter driver, this command is run +by git commands such as `git checkout` and outputs the content of annexed +objects that pointer files point to. + +To configure git to use git-annex as a git filter driver, place the +following in the .gitattributes file: + + * filter=annex + .* !filter + +# SEE ALSO + +[[git-annex]](1) + +[[git-annex-clean]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 2020ccf3fd..a8cb73b1b1 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -626,6 +626,12 @@ subdirectories). See [[git-annex-diffdriver]](1) for details. +* `smudge`, `clean` + + These let git-annex be used as a git filter driver. + + See [[git-annex-smudge]](1) and [[git-annex-clean]](1) for details. + * `remotedaemon` Detects when network remotes have received git pushes and fetches from them. diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index aea0c9b984..094b5f880f 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -177,8 +177,8 @@ Configuration: the annex. Other files are passed through the smudge/clean as-is and have their contents stored in git. -* annex.direct is repurposed to configure how the assistant adds files. - When set to true, they're added unlocked. +* annex.direct is repurposed to configure how git-annex adds files. + When set to false, it adds symlinks and when true it adds pointer files. git-annex clean: From 2c6454a2e2ce0879123a7b75a56b0b720682db36 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 13:39:14 -0400 Subject: [PATCH 02/96] basic clean filter working --- Annex/Content.hs | 18 ++++++++++++++++++ Command/Clean.hs | 39 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 289a4f1b37..74fae381b7 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -24,6 +24,7 @@ module Annex.Content ( withTmp, checkDiskSpace, moveAnnex, + linkAnnex, sendAnnex, prepSendAnnex, removeAnnex, @@ -470,6 +471,23 @@ moveAnnex key src = withObjectLoc key storeobject storedirect alreadyhave = liftIO $ removeFile src +{- Hard links a file into .git/annex/objects/, falling back to a copy + - if necessary. + - + - Does not lock down the hard linked object, so that the user can modify + - the source file. So, adding an object to the annex this way can + - prevent losing the content if the source file is deleted, but does not + - guard against modifications. + -} +linkAnnex :: Key -> FilePath -> Annex Bool +linkAnnex key src = do + dest <- calcRepo (gitAnnexLocation key) + ifM (liftIO $ doesFileExist dest) + ( return True + , modifyContent dest $ + liftIO $ createLinkOrCopy src dest + ) + {- Runs an action to transfer an object's content. - - In direct mode, it's possible for the file to change as it's being sent. diff --git a/Command/Clean.hs b/Command/Clean.hs index 9af862fb07..0a8e438d1d 100644 --- a/Command/Clean.hs +++ b/Command/Clean.hs @@ -10,8 +10,12 @@ module Command.Clean where import Common.Annex import Command import Annex.Content -import Annex.Link -import Git.Types +import Annex.MetaData +import Types.KeySource +import Types.Key +import Backend + +import qualified Data.ByteString.Lazy as B cmd :: Command cmd = dontCheck repoExists $ @@ -24,6 +28,35 @@ seek = withWords start start :: [String] -> CommandStart start [file] = do - error ("clean " ++ file) + ifM (shouldAnnex file) + ( do + k <- ingest file + liftIO $ putStrLn (key2file k) + , liftIO $ B.hGetContents stdin >>= B.hPut stdout -- cat file + ) + stop start [] = error "clean filter run without filename; upgrade git" start _ = error "clean filter passed multiple filenames" + +shouldAnnex :: FilePath -> Annex Bool +shouldAnnex _ = return True +-- TODO check annex.largefiles + +ingest :: FilePath -> Annex Key +ingest file = do + backend <- chooseBackend file + let source = KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = Nothing + } + k <- fst . fromMaybe (error "failed to generate a key") + <$> genKey source backend + -- Hard link (or copy) file content to annex + -- to prevent it from being lost when git checks out + -- a branch not contaning this file. + unlessM (linkAnnex k file) $ + error "Problem adding file to the annex" + genMetaData k file + =<< liftIO (getFileStatus file) + return k From d3496932698f54e0553c3ef9cfdb11c12fe6a3af Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 14:03:10 -0400 Subject: [PATCH 03/96] smudge filter working --- Command/Smudge.hs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 22f9efd696..dc618d36e0 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -9,9 +9,9 @@ module Command.Smudge where import Common.Annex import Command -import Annex.Content -import Annex.Link -import Git.Types +import Types.Key + +import qualified Data.ByteString.Lazy as B cmd :: Command cmd = dontCheck repoExists $ @@ -23,7 +23,25 @@ seek :: CmdParams -> CommandSeek seek = withWords start start :: [String] -> CommandStart -start [file] = do - error ("smudge " ++ file) +start [_file] = do + liftIO $ fileEncoding stdin + s <- liftIO $ hGetContents stdin + case parsePointer s of + Nothing -> liftIO $ putStr s + Just k -> do + content <- calcRepo (gitAnnexLocation k) + liftIO $ maybe + (putStr s) + (B.hPut stdout) + =<< catchMaybeIO (B.readFile content) + stop start [] = error "smudge filter run without filename; upgrade git" start _ = error "smudge filter passed multiple filenames" + +parsePointer :: String -> Maybe Key +parsePointer s + | length s' >= maxsz = Nothing -- too long to be a key pointer + | otherwise = headMaybe (lines s') >>= file2key + where + s' = take maxsz s + maxsz = 81920 From fdfda7b7bbc8c93baa821ae260f63485ee67960b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 14:10:18 -0400 Subject: [PATCH 04/96] annex.largefiles support for clean filter --- Command/Clean.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Command/Clean.hs b/Command/Clean.hs index 0a8e438d1d..1793ddedb7 100644 --- a/Command/Clean.hs +++ b/Command/Clean.hs @@ -11,6 +11,7 @@ import Common.Annex import Command import Annex.Content import Annex.MetaData +import Annex.FileMatcher import Types.KeySource import Types.Key import Backend @@ -39,8 +40,9 @@ start [] = error "clean filter run without filename; upgrade git" start _ = error "clean filter passed multiple filenames" shouldAnnex :: FilePath -> Annex Bool -shouldAnnex _ = return True --- TODO check annex.largefiles +shouldAnnex file = do + matcher <- largeFilesMatcher + checkFileMatcher matcher file ingest :: FilePath -> Annex Key ingest file = do @@ -54,7 +56,7 @@ ingest file = do <$> genKey source backend -- Hard link (or copy) file content to annex -- to prevent it from being lost when git checks out - -- a branch not contaning this file. + -- a branch not containing this file. unlessM (linkAnnex k file) $ error "Problem adding file to the annex" genMetaData k file From ad06f8ceed081d9eaa7204151af5521145cfaa3b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 14:20:22 -0400 Subject: [PATCH 05/96] avoid commit and messages for smudge filter --- Command/Smudge.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index dc618d36e0..6c4b9604a3 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -14,7 +14,7 @@ import Types.Key import qualified Data.ByteString.Lazy as B cmd :: Command -cmd = dontCheck repoExists $ +cmd = noCommit $ noMessages $ dontCheck repoExists $ command "smudge" SectionPlumbing "git smudge filter" paramFile (withParams seek) From 99b2a524a063ec9ce374c8e7d864d2c0119c73bc Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 14:20:32 -0400 Subject: [PATCH 06/96] clean filter should update location log when adding new content to annex --- Annex/Content.hs | 12 +++++++++--- Command/Clean.hs | 10 +++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 74fae381b7..73cb6ab012 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -25,6 +25,7 @@ module Annex.Content ( checkDiskSpace, moveAnnex, linkAnnex, + LinkAnnexResult(..), sendAnnex, prepSendAnnex, removeAnnex, @@ -479,15 +480,20 @@ moveAnnex key src = withObjectLoc key storeobject storedirect - prevent losing the content if the source file is deleted, but does not - guard against modifications. -} -linkAnnex :: Key -> FilePath -> Annex Bool +linkAnnex :: Key -> FilePath -> Annex LinkAnnexResult linkAnnex key src = do dest <- calcRepo (gitAnnexLocation key) ifM (liftIO $ doesFileExist dest) - ( return True + ( return LinkAnnexNoop , modifyContent dest $ - liftIO $ createLinkOrCopy src dest + ifM (liftIO $ createLinkOrCopy src dest) + ( return LinkAnnexOk + , return LinkAnnexFailed + ) ) +data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop + {- Runs an action to transfer an object's content. - - In direct mode, it's possible for the file to change as it's being sent. diff --git a/Command/Clean.hs b/Command/Clean.hs index 1793ddedb7..15dcdfacbc 100644 --- a/Command/Clean.hs +++ b/Command/Clean.hs @@ -15,11 +15,12 @@ import Annex.FileMatcher import Types.KeySource import Types.Key import Backend +import Logs.Location import qualified Data.ByteString.Lazy as B cmd :: Command -cmd = dontCheck repoExists $ +cmd = noMessages $ dontCheck repoExists $ command "clean" SectionPlumbing "git clean filter" paramFile (withParams seek) @@ -57,8 +58,11 @@ ingest file = do -- Hard link (or copy) file content to annex -- to prevent it from being lost when git checks out -- a branch not containing this file. - unlessM (linkAnnex k file) $ - error "Problem adding file to the annex" + r <- linkAnnex k file + case r of + LinkAnnexFailed -> error "Problem adding file to the annex" + LinkAnnexOk -> logStatus k InfoPresent + LinkAnnexNoop -> noop genMetaData k file =<< liftIO (getFileStatus file) return k From 983c1894eb4dbf3d443f9953c69426974ac959fe Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 14:57:28 -0400 Subject: [PATCH 07/96] avoid unnecessary reading of git-annex branch data when matching on annex.largefiles This makes git annex clean not look at the git-annex branch at all, and so speeds it up by 50% or more. --- Annex/FileMatcher.hs | 22 ++++++++++++---------- Limit.hs | 28 ++++++++++++++-------------- Logs/PreferredContent.hs | 4 ++-- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 8b0db60ad0..a008198f31 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -14,7 +14,6 @@ import Limit import Utility.Matcher import Types.Group import Logs.Group -import Logs.Remote import Annex.UUID import qualified Annex import Types.FileMatcher @@ -53,8 +52,8 @@ parsedToMatcher parsed = case partitionEithers parsed of ([], vs) -> Right $ generate vs (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es -exprParser :: FileMatcher Annex -> FileMatcher Annex -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] -exprParser matchstandard matchgroupwanted groupmap configmap mu expr = +exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] +exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr = map parse $ tokenizeMatcher expr where parse = parseToken @@ -62,12 +61,12 @@ exprParser matchstandard matchgroupwanted groupmap configmap mu expr = matchgroupwanted (limitPresent mu) (limitInDir preferreddir) - groupmap + getgroupmap preferreddir = fromMaybe "public" $ M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu -parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> GroupMap -> String -> Either String (Token (MatchFiles Annex)) -parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupmap t +parseToken :: FileMatcher Annex -> FileMatcher Annex -> MkLimit Annex -> MkLimit Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex)) +parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t | t `elem` tokens = Right $ token t | t == "standard" = call matchstandard | t == "groupwanted" = call matchgroupwanted @@ -86,7 +85,7 @@ parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir groupma , ("largerthan", limitSize (>)) , ("smallerthan", limitSize (<)) , ("metadata", limitMetaData) - , ("inallgroup", limitInAllGroup groupmap) + , ("inallgroup", limitInAllGroup getgroupmap) ] where (k, v) = separate (== '=') t @@ -109,9 +108,12 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig where go Nothing = return matchAll go (Just expr) = do - gm <- groupMap - rc <- readRemoteLog u <- getUUID + -- No need to read remote configs, that's only needed for + -- inpreferreddir, which is used in preferred content + -- expressions but does not make sense in the + -- annex.largefiles expression. + let emptyconfig = M.empty either badexpr return $ - parsedToMatcher $ exprParser matchAll matchAll gm rc (Just u) expr + parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr badexpr e = error $ "bad annex.largefiles configuration: " ++ e diff --git a/Limit.hs b/Limit.hs index 6930ab06d9..321c1122b3 100644 --- a/Limit.hs +++ b/Limit.hs @@ -201,22 +201,22 @@ limitAnything _ _ = return True {- Adds a limit to skip files not believed to be present in all - repositories in the specified group. -} addInAllGroup :: String -> Annex () -addInAllGroup groupname = do - m <- groupMap - addLimit $ limitInAllGroup m groupname +addInAllGroup groupname = addLimit $ limitInAllGroup groupMap groupname -limitInAllGroup :: GroupMap -> MkLimit Annex -limitInAllGroup m groupname - | S.null want = Right $ const $ const $ return True - | otherwise = Right $ \notpresent -> checkKey $ check notpresent - where - want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m - check notpresent key +limitInAllGroup :: Annex GroupMap -> MkLimit Annex +limitInAllGroup getgroupmap groupname = Right $ \notpresent mi -> do + m <- getgroupmap + let want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m + if S.null want + then return True -- optimisation: Check if a wanted uuid is notpresent. - | not (S.null (S.intersection want notpresent)) = return False - | otherwise = do - present <- S.fromList <$> Remote.keyLocations key - return $ S.null $ want `S.difference` present + else if not (S.null (S.intersection want notpresent)) + then return False + else checkKey (check want) mi + where + check want key = do + present <- S.fromList <$> Remote.keyLocations key + return $ S.null $ want `S.difference` present {- Adds a limit to skip files not using a specified key-value backend. -} addInBackend :: String -> Annex () diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index c21d67010f..035c098f62 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True | null (lefts tokens) = generate $ rights tokens | otherwise = unknownMatcher u where - tokens = exprParser matchstandard matchgroupwanted groupmap configmap (Just u) expr + tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr matchstandard | expandstandard = maybe (unknownMatcher u) (go False False) (standardPreferredContent <$> getStandardGroup mygroups) @@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of Left e -> Just e Right _ -> Nothing where - tokens = exprParser matchAll matchAll emptyGroupMap M.empty Nothing expr + tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr {- Puts a UUID in a standard group, and sets its preferred content to use - the standard expression for that group (unless preferred content is From 723e4e31a1704aab69e87e29dc10db0b9e85c674 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 15:30:06 -0400 Subject: [PATCH 08/96] merge clean into smudge command The git filter config can be used to map the single git-annex command to the 2 actions, and this avoids "git annex clean" being used for this thing, it might have a better use for that name later. --- CmdLine/GitAnnex.hs | 2 -- Command/Clean.hs | 68 ------------------------------------- Command/Smudge.hs | 71 ++++++++++++++++++++++++++++++++++----- doc/git-annex-clean.mdwn | 36 -------------------- doc/git-annex-smudge.mdwn | 26 +++++++++----- doc/git-annex.mdwn | 8 +++-- 6 files changed, 86 insertions(+), 125 deletions(-) delete mode 100644 Command/Clean.hs delete mode 100644 doc/git-annex-clean.mdwn diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index ee31082064..ba7689f702 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -97,7 +97,6 @@ import qualified Command.Forget import qualified Command.Proxy import qualified Command.DiffDriver import qualified Command.Smudge -import qualified Command.Clean import qualified Command.Undo import qualified Command.Version #ifdef WITH_ASSISTANT @@ -204,7 +203,6 @@ cmds testoptparser testrunner = , Command.Proxy.cmd , Command.DiffDriver.cmd , Command.Smudge.cmd - , Command.Clean.cmd , Command.Undo.cmd , Command.Version.cmd #ifdef WITH_ASSISTANT diff --git a/Command/Clean.hs b/Command/Clean.hs deleted file mode 100644 index 15dcdfacbc..0000000000 --- a/Command/Clean.hs +++ /dev/null @@ -1,68 +0,0 @@ -{- git-annex command - - - - Copyright 2015 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -module Command.Clean where - -import Common.Annex -import Command -import Annex.Content -import Annex.MetaData -import Annex.FileMatcher -import Types.KeySource -import Types.Key -import Backend -import Logs.Location - -import qualified Data.ByteString.Lazy as B - -cmd :: Command -cmd = noMessages $ dontCheck repoExists $ - command "clean" SectionPlumbing - "git clean filter" - paramFile (withParams seek) - -seek :: CmdParams -> CommandSeek -seek = withWords start - -start :: [String] -> CommandStart -start [file] = do - ifM (shouldAnnex file) - ( do - k <- ingest file - liftIO $ putStrLn (key2file k) - , liftIO $ B.hGetContents stdin >>= B.hPut stdout -- cat file - ) - stop -start [] = error "clean filter run without filename; upgrade git" -start _ = error "clean filter passed multiple filenames" - -shouldAnnex :: FilePath -> Annex Bool -shouldAnnex file = do - matcher <- largeFilesMatcher - checkFileMatcher matcher file - -ingest :: FilePath -> Annex Key -ingest file = do - backend <- chooseBackend file - let source = KeySource - { keyFilename = file - , contentLocation = file - , inodeCache = Nothing - } - k <- fst . fromMaybe (error "failed to generate a key") - <$> genKey source backend - -- Hard link (or copy) file content to annex - -- to prevent it from being lost when git checks out - -- a branch not containing this file. - r <- linkAnnex k file - case r of - LinkAnnexFailed -> error "Problem adding file to the annex" - LinkAnnexOk -> logStatus k InfoPresent - LinkAnnexNoop -> noop - genMetaData k file - =<< liftIO (getFileStatus file) - return k diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 6c4b9604a3..07a3e1805f 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -10,20 +10,37 @@ module Command.Smudge where import Common.Annex import Command import Types.Key +import Annex.Content +import Annex.MetaData +import Annex.FileMatcher +import Types.KeySource +import Backend +import Logs.Location import qualified Data.ByteString.Lazy as B cmd :: Command -cmd = noCommit $ noMessages $ dontCheck repoExists $ +cmd = noCommit $ noMessages $ command "smudge" SectionPlumbing "git smudge filter" - paramFile (withParams seek) + paramFile (seek <$$> optParser) -seek :: CmdParams -> CommandSeek -seek = withWords start +data SmudgeOptions = SmudgeOptions + { smudgeFile :: FilePath + , cleanOption :: Bool + } -start :: [String] -> CommandStart -start [_file] = do +optParser :: CmdParamsDesc -> Parser SmudgeOptions +optParser desc = SmudgeOptions + <$> argument str ( metavar desc ) + <*> switch ( long "clean" <> help "clean filter" ) + +seek :: SmudgeOptions -> CommandSeek +seek o = commandAction $ + (if cleanOption o then clean else smudge) (smudgeFile o) + +smudge :: FilePath -> CommandStart +smudge _file = do liftIO $ fileEncoding stdin s <- liftIO $ hGetContents stdin case parsePointer s of @@ -35,8 +52,46 @@ start [_file] = do (B.hPut stdout) =<< catchMaybeIO (B.readFile content) stop -start [] = error "smudge filter run without filename; upgrade git" -start _ = error "smudge filter passed multiple filenames" + +clean :: FilePath -> CommandStart +clean file = do + ifM (shouldAnnex file) + ( do + k <- ingest file + liftIO $ emitPointer k + , liftIO $ B.hGetContents stdin >>= B.hPut stdout -- cat file + ) + stop + +shouldAnnex :: FilePath -> Annex Bool +shouldAnnex file = do + matcher <- largeFilesMatcher + checkFileMatcher matcher file + +ingest :: FilePath -> Annex Key +ingest file = do + backend <- chooseBackend file + let source = KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = Nothing + } + k <- fst . fromMaybe (error "failed to generate a key") + <$> genKey source backend + -- Hard link (or copy) file content to annex + -- to prevent it from being lost when git checks out + -- a branch not containing this file. + r <- linkAnnex k file + case r of + LinkAnnexFailed -> error "Problem adding file to the annex" + LinkAnnexOk -> logStatus k InfoPresent + LinkAnnexNoop -> noop + genMetaData k file + =<< liftIO (getFileStatus file) + return k + +emitPointer :: Key -> IO () +emitPointer = putStrLn . key2file parsePointer :: String -> Maybe Key parsePointer s diff --git a/doc/git-annex-clean.mdwn b/doc/git-annex-clean.mdwn deleted file mode 100644 index 13099a5447..0000000000 --- a/doc/git-annex-clean.mdwn +++ /dev/null @@ -1,36 +0,0 @@ -# NAME - -git-annex clean - git filter driver for git-annex - -# SYNOPSIS - -git annex clean - -# DESCRIPTION - -When git-annex is used as a git filter driver, this command is run -by git commands such as `git add`. It generates a file that -is added to the git repository and points to the git-annex object -containing the content of a large file. - -To configure git to use git-annex as a git filter driver, place the -following in the .gitattributes file: - - * filter=annex - .* !filter - -The annex.largefiles config is consulted to decide if a given file should -be added to git as-is, or if its content are large enough to need to use -git-annex. - -# SEE ALSO - -[[git-annex]](1) - -[[git-annex-smudge]](1) - -# AUTHOR - -Joey Hess - -Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn index ae28be2c7d..a4f458ee51 100644 --- a/doc/git-annex-smudge.mdwn +++ b/doc/git-annex-smudge.mdwn @@ -4,26 +4,36 @@ git-annex smudge - git filter driver for git-annex # SYNOPSIS -git annex smudge +git annex smudge [--clean] file # DESCRIPTION -When git-annex is used as a git filter driver, this command is run -by git commands such as `git checkout` and outputs the content of annexed -objects that pointer files point to. +This command lets git-annex be used as a git filter driver which lets +annexed files in the git repository to be unlocked at all times, instead +of being symlinks. -To configure git to use git-annex as a git filter driver, place the -following in the .gitattributes file: +The git configuration to use this command as a filter driver is as follows, +but this is normally set up for you by git-annex init, so you should +not need to configure it manually: + + [filter "annex"] + clean = git-annex smudge --clean %f + smudge = git-annex smudge %f + +To make git use this filter on all files except for dotfiles, put something +like the following in the .gitattributes file: * filter=annex .* !filter +When adding a file with `git add`, the annex.largefiles config is +consulted to decide if a given file should be added to git as-is, +or if its content are large enough to need to use git-annex. + # SEE ALSO [[git-annex]](1) -[[git-annex-clean]](1) - # AUTHOR Joey Hess diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index a8cb73b1b1..1a2fd6e679 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -626,11 +626,13 @@ subdirectories). See [[git-annex-diffdriver]](1) for details. -* `smudge`, `clean` +* `smudge` - These let git-annex be used as a git filter driver. + This command lets git-annex be used as a git filter driver, allowing + annexed files in the git repository to be unlocked at all times, instead + of being symlinks. - See [[git-annex-smudge]](1) and [[git-annex-clean]](1) for details. + See [[git-annex-smudge]](1) for details. * `remotedaemon` From 34ead644d93085ab65cdeb21405539d4699424bc Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 16:14:11 -0400 Subject: [PATCH 09/96] auto-configure filter.annex.smudge and clean on init --- Annex/Init.hs | 3 ++- Config.hs | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Annex/Init.hs b/Annex/Init.hs index 65e9aa474a..b00e412180 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -85,7 +85,8 @@ initialize' = do unlessM isBare $ hookWrite preCommitHook setDifferences - setVersion supportedVersion + setVersion currentVersion + configureSmudgeFilter ifM (crippledFileSystem <&&> not <$> isBare) ( do enableDirectMode diff --git a/Config.hs b/Config.hs index 4af4f12842..bddb8abe3d 100644 --- a/Config.hs +++ b/Config.hs @@ -90,3 +90,8 @@ setCrippledFileSystem :: Bool -> Annex () setCrippledFileSystem b = do setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b) Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b } + +configureSmudgeFilter :: Annex () +configureSmudgeFilter = do + setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f" + setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f" From ccc49861ca35d98b2cbbc0c29c730adc4f2e4d73 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 16:14:48 -0400 Subject: [PATCH 10/96] add v6; keep v5 working for now and manual upgrade Since all places where a repo is used in direct mode need to have git-annex upgraded before the repo can safely be converted to v6, the upgrade needs to be manual for now. I suppose that at some point I'll want to drop all the direct mode support code. At that point, will stop supporting v5, and will need to auto-upgrade any remaining v5 repos. If possible, I'd like to carry the direct mode support for say, a year or so, to give people plenty of time to upgrade and avoid disruption. --- Annex/Version.hs | 11 +++++++---- Command/Version.hs | 3 ++- Upgrade.hs | 6 ++++-- Upgrade/V1.hs | 4 ++-- Upgrade/V5.hs | 18 ++++++++++++++++++ debian/changelog | 8 +++++++- doc/git-annex-smudge.mdwn | 2 +- doc/todo/smudge.mdwn | 8 +++++--- doc/upgrades.mdwn | 28 ++++++++++++++++++++++++++++ 9 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 Upgrade/V5.hs diff --git a/Annex/Version.hs b/Annex/Version.hs index d08f994e95..e1983fc67a 100644 --- a/Annex/Version.hs +++ b/Annex/Version.hs @@ -15,14 +15,17 @@ import qualified Annex type Version = String -supportedVersion :: Version -supportedVersion = "5" +currentVersion :: Version +currentVersion = "6" + +supportedVersions :: [Version] +supportedVersions = ["5", currentVersion] upgradableVersions :: [Version] #ifndef mingw32_HOST_OS -upgradableVersions = ["0", "1", "2", "4"] +upgradableVersions = ["0", "1", "2", "4", "5"] #else -upgradableVersions = ["2", "3", "4"] +upgradableVersions = ["2", "3", "4", "5"] #endif autoUpgradeableVersions :: [Version] diff --git a/Command/Version.hs b/Command/Version.hs index 72bbe40648..c5a9fcef2a 100644 --- a/Command/Version.hs +++ b/Command/Version.hs @@ -50,7 +50,8 @@ showVersion = do liftIO $ do showPackageVersion vinfo "local repository version" $ fromMaybe "unknown" v - vinfo "supported repository version" supportedVersion + vinfo "supported repository versions" $ + unwords supportedVersions vinfo "upgrade supported from repository versions" $ unwords upgradableVersions diff --git a/Upgrade.hs b/Upgrade.hs index 8d205a874a..1f4a8d8dec 100644 --- a/Upgrade.hs +++ b/Upgrade.hs @@ -18,13 +18,14 @@ import qualified Upgrade.V1 import qualified Upgrade.V2 import qualified Upgrade.V3 import qualified Upgrade.V4 +import qualified Upgrade.V5 checkUpgrade :: Version -> Annex () checkUpgrade = maybe noop error <=< needsUpgrade needsUpgrade :: Version -> Annex (Maybe String) needsUpgrade v - | v == supportedVersion = ok + | v `elem` supportedVersions = ok | v `elem` autoUpgradeableVersions = ifM (upgrade True) ( ok , err "Automatic upgrade failed!" @@ -40,7 +41,7 @@ upgrade :: Bool -> Annex Bool upgrade automatic = do upgraded <- go =<< getVersion when upgraded $ - setVersion supportedVersion + setVersion currentVersion return upgraded where #ifndef mingw32_HOST_OS @@ -53,4 +54,5 @@ upgrade automatic = do go (Just "2") = Upgrade.V2.upgrade go (Just "3") = Upgrade.V3.upgrade automatic go (Just "4") = Upgrade.V4.upgrade automatic + go (Just "5") = Upgrade.V5.upgrade automatic go _ = return True diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 801cdafa0d..bcf7e0b6de 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -54,14 +54,14 @@ upgrade = do ifM (fromRepo Git.repoIsLocalBare) ( do moveContent - setVersion supportedVersion + setVersion currentVersion , do moveContent updateSymlinks moveLocationLogs Annex.Queue.flush - setVersion supportedVersion + setVersion currentVersion ) Upgrade.V2.upgrade diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs new file mode 100644 index 0000000000..cf273bb16a --- /dev/null +++ b/Upgrade/V5.hs @@ -0,0 +1,18 @@ +{- git-annex v5 -> v6 uppgrade support + - + - Copyright 2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Upgrade.V5 where + +import Common.Annex +import Config + +upgrade :: Bool -> Annex Bool +upgrade automatic = do + unless automatic $ + showAction "v5 to v6" + configureSmudgeFilter + return True diff --git a/debian/changelog b/debian/changelog index fae4b598b4..f19a22b3eb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,11 @@ -git-annex (5.20151117) UNRELEASED; urgency=medium +git-annex (6.20151117) UNRELEASED; urgency=medium + * annex.version increased to 6, but version 5 is also still supported. + * The upgrade to version 6 is not done fully automatically, because + upgrading a direct mode repository to version 6 will prevent old + versions of git-annex from working in that repository. + * smudge: New command, used for git smudge filter. + This will replace direct mode. * Build with -j1 again to get reproducible build. * Display progress meter in -J mode when copying from a local git repo, to a local git repo, and from a remote git repo. diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn index a4f458ee51..5186d2a178 100644 --- a/doc/git-annex-smudge.mdwn +++ b/doc/git-annex-smudge.mdwn @@ -17,8 +17,8 @@ but this is normally set up for you by git-annex init, so you should not need to configure it manually: [filter "annex"] - clean = git-annex smudge --clean %f smudge = git-annex smudge %f + clean = git-annex smudge --clean %f To make git use this filter on all files except for dotfiles, put something like the following in the .gitattributes file: diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 094b5f880f..6e6af6f65b 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -308,15 +308,17 @@ annex.version changes to 6 Upgrade should be handled automatically. -On upgrade, update .gitattributes with a stock configuration, unless -it already mentions "filter=annex". +On upgrade, update $GIT_DIR/info/attributes with a stock configuration, +unless it already mentions "filter=annex". Upgrading a direct mode repo needs to switch it out of bare mode, and needs to run `git annex unlock` on all files (or reach the same result). So will need to stage changes to all annexed files. When a repo has some clones indirect and some direct, the upgraded repo -will have all files unlocked, necessarily in all clones. +will have all files unlocked, necessarily in all clones. This happens +automatically, because when the direct repos are upgraded that causes the +files to be unlocked, while the indirect upgrades don't touch the files. ---- diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn index f5e9cbc3a6..7600d8e144 100644 --- a/doc/upgrades.mdwn +++ b/doc/upgrades.mdwn @@ -43,6 +43,34 @@ conflicts first before upgrading git-annex. The upgrade events, so far: +## v5 -> v6 (git-annex version 6.x) + +The upgrade from v5 to v6 is handled manually. Run `git-annex upgrade` +perform the upgrade. + +All places that a direct mode repository is cloned to should be +running git-annex version 6.x before you upgrade the repository. +This is necessary because the contents of the repository are changed +in the upgrade, and the old version of git-annex won't be able to +access files after the repo is upgraded. + +If a repository is only used in indirect mode, this upgrade will not +affect it significantly. You can use git-annex v5 and v6 in different +clones of the same indirect mode repository without problems. + +This upgrade does away with the direct mode/indirect mode distinction. +A v6 git-annex repository can have some files locked and other files +unlocked, and all git and git-annex commands can be used on both locked and +unlocked files. (Although for locked files to work, the filesystem +must support symbolic links..) + +On upgrade, all files in a direct mode repository will be converted to +unlocked files. The upgrade will need to stage changes to all files in +the git repository. + +If a repository has some clones using direct mode and some using indirect +mode, all the files will end up unlocked in all clones after the upgrade. + ## v4 -> v5 (git-annex version 5.x) The upgrade from v4 to v5 is handled From e7f75b079db8ca406a71be25c2bdec08678413a9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 16:29:27 -0400 Subject: [PATCH 11/96] don't let git-annex direct be run in a v6 repo --- Annex/Version.hs | 6 ++++++ Command/Direct.hs | 6 +++++- doc/direct_mode.mdwn | 7 +++++++ doc/git-annex-direct.mdwn | 6 ++++++ doc/git-annex-indirect.mdwn | 5 ++--- doc/todo/smudge.mdwn | 2 -- 6 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Annex/Version.hs b/Annex/Version.hs index e1983fc67a..f9b24d9c49 100644 --- a/Annex/Version.hs +++ b/Annex/Version.hs @@ -37,6 +37,12 @@ versionField = annexConfig "version" getVersion :: Annex (Maybe Version) getVersion = annexVersion <$> Annex.getGitConfig +versionSupportsDirectMode :: Annex Bool +versionSupportsDirectMode = go <$> getVersion + where + go (Just "6") = False + go _ = True + setVersion :: Version -> Annex () setVersion = setConfig versionField diff --git a/Command/Direct.hs b/Command/Direct.hs index 162780dd5c..9cfd258ebd 100644 --- a/Command/Direct.hs +++ b/Command/Direct.hs @@ -14,6 +14,7 @@ import qualified Git.LsFiles import qualified Git.Branch import Config import Annex.Direct +import Annex.Version cmd :: Command cmd = notBareRepo $ noDaemonRunning $ @@ -24,7 +25,10 @@ seek :: CmdParams -> CommandSeek seek = withNothing start start :: CommandStart -start = ifM isDirect ( stop , next perform ) +start = ifM versionSupportsDirectMode + ( ifM isDirect ( stop , next perform ) + , error "Direct mode is not suppported by this repository version. Use git-annex unlock instead." + ) perform :: CommandPerform perform = do diff --git a/doc/direct_mode.mdwn b/doc/direct_mode.mdwn index 4c2cb2dd7c..d3e1067f9a 100644 --- a/doc/direct_mode.mdwn +++ b/doc/direct_mode.mdwn @@ -9,6 +9,13 @@ understand how to update its working tree. [[!toc]] +## deprecated + +Direct mode is deprecated! Intead, git-annex v6 repositories can simply +have files that are unlocked and thus can be directly accessed and +modified. See [[upgrades]] for details about the transition to v6 +repositories. + ## enabling (and disabling) direct mode Normally, git-annex repositories start off in indirect mode. With some diff --git a/doc/git-annex-direct.mdwn b/doc/git-annex-direct.mdwn index 457ae31162..3cade1a8c9 100644 --- a/doc/git-annex-direct.mdwn +++ b/doc/git-annex-direct.mdwn @@ -17,12 +17,18 @@ Note that git commands that operate on the work tree will refuse to run in direct mode repositories. Use `git annex proxy` to safely run such commands. +Note that the direct mode/indirect mode distinction is removed in v6 +git-annex repositories. In such a repository, you can +use [[git-annex-unlock]](1) to make a file's content be directly present. + # SEE ALSO [[git-annex]](1) [[git-annex-indirect]](1) +[[git-annex-unlock]](1) + # AUTHOR Joey Hess diff --git a/doc/git-annex-indirect.mdwn b/doc/git-annex-indirect.mdwn index 99def61448..321e0fb369 100644 --- a/doc/git-annex-indirect.mdwn +++ b/doc/git-annex-indirect.mdwn @@ -11,9 +11,8 @@ git annex indirect Switches a repository back from direct mode to the default, indirect mode. -Some systems cannot support git-annex in indirect mode, because they -do not support symbolic links. Repositories on such systems instead -default to using direct mode. +Note that the direct mode/indirect mode distinction is removed in v6 +git-annex repositories. # SEE ALSO diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 6e6af6f65b..7a232123a6 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -306,8 +306,6 @@ just look at the repo content in the first place.. annex.version changes to 6 -Upgrade should be handled automatically. - On upgrade, update $GIT_DIR/info/attributes with a stock configuration, unless it already mentions "filter=annex". From e8ca01cbc0383c724f63a8fe7ba468278285335a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 16:46:00 -0400 Subject: [PATCH 12/96] comments --- Command/Smudge.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 07a3e1805f..ddf81af437 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -39,6 +39,8 @@ seek :: SmudgeOptions -> CommandSeek seek o = commandAction $ (if cleanOption o then clean else smudge) (smudgeFile o) +-- Smudge filter is fed git file content, and if it's a pointer, should +-- emit the annex object content. smudge :: FilePath -> CommandStart smudge _file = do liftIO $ fileEncoding stdin @@ -53,6 +55,8 @@ smudge _file = do =<< catchMaybeIO (B.readFile content) stop +-- Clean filter decides if a file should be stored in the annex, and +-- outputs a pointer to its injested content. clean :: FilePath -> CommandStart clean file = do ifM (shouldAnnex file) From fb6ebdaae7a3c57cc27e2dfba2736497e0d61892 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 17:18:26 -0400 Subject: [PATCH 13/96] refactor --- Command/Smudge.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index ddf81af437..70a318c2d5 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -39,8 +39,8 @@ seek :: SmudgeOptions -> CommandSeek seek o = commandAction $ (if cleanOption o then clean else smudge) (smudgeFile o) --- Smudge filter is fed git file content, and if it's a pointer, should --- emit the annex object content. +-- Smudge filter is fed git file content, and if it's a pointer to an +-- available annex object, should output its content. smudge :: FilePath -> CommandStart smudge _file = do liftIO $ fileEncoding stdin @@ -63,10 +63,13 @@ clean file = do ( do k <- ingest file liftIO $ emitPointer k - , liftIO $ B.hGetContents stdin >>= B.hPut stdout -- cat file + , liftIO cat ) stop +cat :: IO () +cat = B.hGetContents stdin >>= B.hPut stdout + shouldAnnex :: FilePath -> Annex Bool shouldAnnex file = do matcher <- largeFilesMatcher From 2fe21d47c54bca4a7bb339d991105cfea460d876 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Dec 2015 17:57:15 -0400 Subject: [PATCH 14/96] init: Configure .git/info/attributes to use git-annex as a smudge filter. Note that this changes the default behavior of git add in a newly initialized repository; it will add files to the annex. Don't like that this could break workflows, but it's necessary in order for any pointer files in the repo to be handled by git-annex. --- Config.hs | 13 +++++++++++++ Git.hs | 8 ++++++-- debian/changelog | 3 +++ doc/git-annex-smudge.mdwn | 24 +++++++++++++++--------- doc/todo/smudge.mdwn | 4 +++- doc/upgrades.mdwn | 29 ++++++++++++++++++++--------- 6 files changed, 60 insertions(+), 21 deletions(-) diff --git a/Config.hs b/Config.hs index bddb8abe3d..f3833b17e4 100644 --- a/Config.hs +++ b/Config.hs @@ -95,3 +95,16 @@ configureSmudgeFilter :: Annex () configureSmudgeFilter = do setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f" setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f" + lf <- Annex.fromRepo Git.attributesLocal + gf <- Annex.fromRepo Git.attributes + lfs <- readattr lf + gfs <- readattr gf + liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do + createDirectoryIfMissing True (takeDirectory lf) + writeFile lf (lfs ++ "\n" ++ stdattr) + where + readattr = liftIO . catchDefaultIO "" . readFileStrictAnyEncoding + stdattr = unlines + [ "* filter=annex" + , ".* !filter" + ] diff --git a/Git.hs b/Git.hs index 1bc789f85d..6f7769c879 100644 --- a/Git.hs +++ b/Git.hs @@ -28,6 +28,7 @@ module Git ( repoPath, localGitDir, attributes, + attributesLocal, hookPath, assertLocal, adjustPath, @@ -125,8 +126,11 @@ assertLocal repo action {- Path to a repository's gitattributes file. -} attributes :: Repo -> FilePath attributes repo - | repoIsLocalBare repo = repoPath repo ++ "/info/.gitattributes" - | otherwise = repoPath repo ++ "/.gitattributes" + | repoIsLocalBare repo = attributesLocal repo + | otherwise = repoPath repo ".gitattributes" + +attributesLocal :: Repo -> FilePath +attributesLocal repo = localGitDir repo "info" "attributes" {- Path to a given hook script in a repository, only if the hook exists - and is executable. -} diff --git a/debian/changelog b/debian/changelog index f19a22b3eb..5fde37c88e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,9 @@ git-annex (6.20151117) UNRELEASED; urgency=medium versions of git-annex from working in that repository. * smudge: New command, used for git smudge filter. This will replace direct mode. + * init: Configure .git/info/attributes to use git-annex as a smudge + filter. Note that this changes the default behavior of git add in a + newly initialized repository; it will add files to the annex. * Build with -j1 again to get reproducible build. * Display progress meter in -J mode when copying from a local git repo, to a local git repo, and from a remote git repo. diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn index 5186d2a178..c8e5453671 100644 --- a/doc/git-annex-smudge.mdwn +++ b/doc/git-annex-smudge.mdwn @@ -12,24 +12,30 @@ This command lets git-annex be used as a git filter driver which lets annexed files in the git repository to be unlocked at all times, instead of being symlinks. -The git configuration to use this command as a filter driver is as follows, -but this is normally set up for you by git-annex init, so you should -not need to configure it manually: +When adding a file with `git add`, the annex.largefiles config is +consulted to decide if a given file should be added to git as-is, +or if its content are large enough to need to use git-annex. To force a +file that would normally be added to the annex to be added to git as-is, +this can be temporarily overridden. For example: + + git -c annex.largefiles='exclude=*' add myfile + +The git configuration to use this command as a filter driver is as follows. +This is normally set up for you by git-annex init, so you should +not need to configure it manually. [filter "annex"] smudge = git-annex smudge %f clean = git-annex smudge --clean %f -To make git use this filter on all files except for dotfiles, put something -like the following in the .gitattributes file: +To make git use that filter driver, it needs to be configured in +the .gitattributes file or in `.git/config/attributes`. The latter +is normally configured when a repository is initialized, with the following +contents: * filter=annex .* !filter -When adding a file with `git add`, the annex.largefiles config is -consulted to decide if a given file should be added to git as-is, -or if its content are large enough to need to use git-annex. - # SEE ALSO [[git-annex]](1) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 7a232123a6..d08d600aee 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -306,7 +306,9 @@ just look at the repo content in the first place.. annex.version changes to 6 -On upgrade, update $GIT_DIR/info/attributes with a stock configuration, +git config for filter.annex.smudge and filter.annex.clean is set up. + +.gitattributes is updated with a stock configuration, unless it already mentions "filter=annex". Upgrading a direct mode repo needs to switch it out of bare mode, and diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn index 7600d8e144..d69941cb1b 100644 --- a/doc/upgrades.mdwn +++ b/doc/upgrades.mdwn @@ -48,21 +48,32 @@ The upgrade events, so far: The upgrade from v5 to v6 is handled manually. Run `git-annex upgrade` perform the upgrade. +This upgrade does away with the direct mode/indirect mode distinction. +A v6 git-annex repository can have some files locked and other files +unlocked, and all git and git-annex commands can be used on both locked and +unlocked files. (Although for locked files to work, the filesystem +must support symbolic links..) + +The behavior of some commands changes in an upgraded repository: + +* `git add` will add files to the annex, in unlocked mode, rather than + adding them directly to the git repository. To bypass that and add a file + directly to git, use: + + git -c annex.largefiles='exclude=*' add myfile + +* `git annex unlock` and `git annex lock` change how the pointer to + the annexed content is stored in git. + All places that a direct mode repository is cloned to should be running git-annex version 6.x before you upgrade the repository. This is necessary because the contents of the repository are changed in the upgrade, and the old version of git-annex won't be able to access files after the repo is upgraded. -If a repository is only used in indirect mode, this upgrade will not -affect it significantly. You can use git-annex v5 and v6 in different -clones of the same indirect mode repository without problems. - -This upgrade does away with the direct mode/indirect mode distinction. -A v6 git-annex repository can have some files locked and other files -unlocked, and all git and git-annex commands can be used on both locked and -unlocked files. (Although for locked files to work, the filesystem -must support symbolic links..) +If a repository is only used in indirect mode, you can use git-annex +v5 and v6 in different clones of the same indirect mode repository without +problems. On upgrade, all files in a direct mode repository will be converted to unlocked files. The upgrade will need to stage changes to all files in From 63c466449fbffad0b0892acdba8431380e2e1d38 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 7 Dec 2015 12:30:10 -0400 Subject: [PATCH 15/96] wording --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 87affe138a..6e4ba115c5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,7 @@ * annex.version increased to 6, but version 5 is also still supported. * The upgrade to version 6 is not done fully automatically, because upgrading a direct mode repository to version 6 will prevent old - versions of git-annex from working in that repository. + versions of git-annex from working in other clones of that repository. * smudge: New command, used for git smudge filter. This will replace direct mode. * init: Configure .git/info/attributes to use git-annex as a smudge From a6e5ee0d0ef4958b17e7aef502edc1913dacaf1a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 7 Dec 2015 13:42:03 -0400 Subject: [PATCH 16/96] associated files database --- Database/AssociatedFiles.hs | 94 +++++++++++++++++++++++++++++++++++++ Database/Fsck.hs | 2 +- Locations.hs | 10 ++++ 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 Database/AssociatedFiles.hs diff --git a/Database/AssociatedFiles.hs b/Database/AssociatedFiles.hs new file mode 100644 index 0000000000..8244f15e80 --- /dev/null +++ b/Database/AssociatedFiles.hs @@ -0,0 +1,94 @@ +{- Sqlite database used for tracking a key's associated files. + - + - Copyright 2015 Joey Hess + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE RankNTypes #-} + +module Database.AssociatedFiles ( + DbHandle, + openDb, + closeDb, + addDb, + getDb, + removeDb, + AssociatedId, +) where + +import Database.Types +import qualified Database.Handle as H +import Locations +import Common hiding (delete) +import Annex +import Types.Key +import Annex.Perms +import Annex.LockFile +import Messages + +import Database.Persist.TH +import Database.Esqueleto hiding (Key) + +newtype DbHandle = DbHandle H.DbHandle + +share [mkPersist sqlSettings, mkMigrate "migrateAssociated"] [persistLowerCase| +Associated + key SKey + file FilePath + KeyFileIndex key file +|] + +{- Opens the database, creating it if it doesn't exist yet. -} +openDb :: Annex DbHandle +openDb = withExclusiveLock gitAnnexAssociatedFilesDbLock $ do + dbdir <- fromRepo gitAnnexAssociatedFilesDb + let db = dbdir "db" + unlessM (liftIO $ doesFileExist db) $ do + liftIO $ do + createDirectoryIfMissing True dbdir + H.initDb db $ void $ + runMigrationSilent migrateAssociated + setAnnexDirPerm dbdir + setAnnexFilePerm db + h <- liftIO $ H.openDb db "associated" + + -- work around https://github.com/yesodweb/persistent/issues/474 + liftIO setConsoleEncoding + + return $ DbHandle h + +closeDb :: DbHandle -> IO () +closeDb (DbHandle h) = H.closeDb h + +addDb :: DbHandle -> Key -> FilePath -> IO () +addDb (DbHandle h) k f = H.queueDb h (\_ _ -> pure True) $ do + -- If the same file was associated with a different key before, + -- remove that. + delete $ from $ \r -> do + where_ (r ^. AssociatedFile ==. val f &&. r ^. AssociatedKey ==. val sk) + void $ insertUnique $ Associated sk f + where + sk = toSKey k + +{- Note that the files returned used to be associated with the key, but + - some of them may not be any longer. -} +getDb :: DbHandle -> Key -> IO [FilePath] +getDb (DbHandle h) = H.queryDb h . getDb' . toSKey + +getDb' :: SKey -> SqlPersistM [FilePath] +getDb' sk = do + l <- select $ from $ \r -> do + where_ (r ^. AssociatedKey ==. val sk) + return (r ^. AssociatedFile) + return $ map unValue l + +removeDb :: DbHandle -> Key -> FilePath -> IO () +removeDb (DbHandle h) k f = H.queueDb h (\_ _ -> pure True) $ + delete $ from $ \r -> do + where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f) + where + sk = toSKey k diff --git a/Database/Fsck.hs b/Database/Fsck.hs index ed00e62d80..b0e56f6c0d 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -59,7 +59,7 @@ newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go go = liftIO . void . tryIO . removeDirectoryRecursive =<< fromRepo (gitAnnexFsckDbDir u) -{- Opens the database, creating it atomically if it doesn't exist yet. -} +{- Opens the database, creating it if it doesn't exist yet. -} openDb :: UUID -> Annex FsckHandle openDb u = do dbdir <- fromRepo (gitAnnexFsckDbDir u) diff --git a/Locations.hs b/Locations.hs index ba61151554..6082957c74 100644 --- a/Locations.hs +++ b/Locations.hs @@ -29,6 +29,8 @@ module Locations ( gitAnnexBadDir, gitAnnexBadLocation, gitAnnexUnusedLog, + gitAnnexAssociatedFilesDb, + gitAnnexAssociatedFilesDbLock, gitAnnexFsckState, gitAnnexFsckDbDir, gitAnnexFsckDbLock, @@ -237,6 +239,14 @@ gitAnnexBadLocation key r = gitAnnexBadDir r keyFile key gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath gitAnnexUnusedLog prefix r = gitAnnexDir r (prefix ++ "unused") +{- .git/annex/map/ contains a database for the associated files map -} +gitAnnexAssociatedFilesDb :: Git.Repo -> FilePath +gitAnnexAssociatedFilesDb r = gitAnnexDir r "map" + +{- Lock file for the associated files map database. -} +gitAnnexAssociatedFilesDbLock :: Git.Repo -> FilePath +gitAnnexAssociatedFilesDbLock r = gitAnnexAssociatedFilesDb r ++ "lck" + {- .git/annex/fsck/uuid/ is used to store information about incremental - fscks. -} gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath From 2cbcb4f1a8e131ec532ce379d8fcdd63f7faa23e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 7 Dec 2015 14:35:46 -0400 Subject: [PATCH 17/96] update associated files database on smudge and clean --- Command/Smudge.hs | 12 +++++++++++- doc/todo/smudge.mdwn | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 70a318c2d5..6cca8035e6 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -16,6 +16,7 @@ import Annex.FileMatcher import Types.KeySource import Backend import Logs.Location +import qualified Database.AssociatedFiles as AssociatedFiles import qualified Data.ByteString.Lazy as B @@ -42,12 +43,13 @@ seek o = commandAction $ -- Smudge filter is fed git file content, and if it's a pointer to an -- available annex object, should output its content. smudge :: FilePath -> CommandStart -smudge _file = do +smudge file = do liftIO $ fileEncoding stdin s <- liftIO $ hGetContents stdin case parsePointer s of Nothing -> liftIO $ putStr s Just k -> do + updateAssociatedFiles k file content <- calcRepo (gitAnnexLocation k) liftIO $ maybe (putStr s) @@ -62,6 +64,7 @@ clean file = do ifM (shouldAnnex file) ( do k <- ingest file + updateAssociatedFiles k file liftIO $ emitPointer k , liftIO cat ) @@ -107,3 +110,10 @@ parsePointer s where s' = take maxsz s maxsz = 81920 + +updateAssociatedFiles :: Key -> FilePath -> Annex () +updateAssociatedFiles k f = do + h <- AssociatedFiles.openDb + liftIO $ do + AssociatedFiles.addDb h k f + AssociatedFiles.closeDb h diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index d08d600aee..74a1435809 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -275,13 +275,14 @@ In particular: * Is the smudge filter called at any other time? Seems unlikely but then there could be situations with a detached work tree or such. * Does git call any useful hooks when removing a file from the work tree, - or converting it to not be annexed? + or converting it to not be annexed, or for `git mv` of an annexed file? No! From this analysis, any file map generated by the smudge/clean filters is necessary potentially innaccurate. It may list deleted files. It may or may not reflect current unstaged changes from the work tree. + Follows that any use of the file map needs to verify the info from it, and throw out bad cached info (updating the map to match reality). From 664cc987e806800ef46794f356003321170c1060 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 7 Dec 2015 15:22:01 -0400 Subject: [PATCH 18/96] support pointer files Backend.lookupFile is changed to always fall back to catKey when operating on a file that's not a symlink. catKey is changed to understand pointer files, as well as annex symlinks. Before, catKey needed a file mode witness, to be sure it was looking at a symlink. That was complicated stuff. Now, it doesn't actually care if a file in git is a symlink or not; in either case asking git for the content of the file will get the pointer to the key. This does mean that git-annex will treat a link foo -> WORM--bar as a git-annex file, and also treats a regular file containing annex/objects/WORM--bar as a git-annex file. Calling catKey could make git-annex commands need to do more work than before. This would especially be the case if a repo contained many regular files, and only a few annexed files, as now git-annex will need to ask git about the contents of the regular files. --- Annex/AutoMerge.hs | 5 ++- Annex/CatFile.hs | 79 ++++++++++++++++++--------------------------- Annex/Direct.hs | 16 ++++----- Annex/View.hs | 4 +-- Backend.hs | 14 +++----- CmdLine/Seek.hs | 2 +- Command/Indirect.hs | 2 +- Command/Smudge.hs | 11 ++----- Command/Undo.hs | 4 +-- 9 files changed, 55 insertions(+), 82 deletions(-) diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs index bfbe71dc28..c32c3f66ac 100644 --- a/Annex/AutoMerge.hs +++ b/Annex/AutoMerge.hs @@ -25,7 +25,6 @@ import qualified Git.Branch import Git.Types (BlobType(..)) import Config import Annex.ReplaceFile -import Git.FileMode import Annex.VariantFile import qualified Data.Set as S @@ -135,7 +134,7 @@ resolveMerge' (Just us) them u = do | select (LsFiles.unmergedBlobType u) == Just SymlinkBlob = case select' (LsFiles.unmergedSha u) of Nothing -> return Nothing - Just sha -> catKey sha symLinkMode + Just sha -> catKey sha | otherwise = return Nothing makelink key = do @@ -174,7 +173,7 @@ resolveMerge' (Just us) them u = do case select' (LsFiles.unmergedSha u) of Nothing -> noop Just sha -> do - link <- catLink True sha + link <- catSymLinkTarget sha replacewithlink item link resolveby a = do diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 1791498446..47ea86a31f 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -1,6 +1,6 @@ {- git cat-file interface, with handle automatically stored in the Annex monad - - - Copyright 2011-2013 Joey Hess + - Copyright 2011-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -14,9 +14,10 @@ module Annex.CatFile ( catFileHandle, catFileStop, catKey, + parsePointer, catKeyFile, catKeyFileHEAD, - catLink, + catSymLinkTarget, ) where import qualified Data.ByteString.Lazy as L @@ -29,8 +30,8 @@ import qualified Git.CatFile import qualified Annex import Git.Types import Git.FilePath -import Git.FileMode import qualified Git.Ref +import Types.Key catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile branch file = do @@ -80,52 +81,36 @@ catFileStop = do (s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s) liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m) -{- From the Sha or Ref of a symlink back to the key. - - - - Requires a mode witness, to guarantee that the file is a symlink. - -} -catKey :: Ref -> FileMode -> Annex (Maybe Key) -catKey = catKey' True +{- From ref to a symlink or a pointer file, get the key. -} +catKey :: Ref -> Annex (Maybe Key) +catKey ref = do + o <- catObject ref + if L.length o > maxsz + then return Nothing -- too big + else do + let l = decodeBS o + let l' = fromInternalGitPath l + return $ if isLinkToAnnex l' + then fileKey $ takeFileName l' + else parsePointer l + where + -- Want to avoid buffering really big files in git into memory. + -- 8192 bytes is plenty for a pointer to a key. + -- Pad some more to allow for any pointer files that might have + -- lines after the key explaining what the file is used for. + maxsz = 81920 -catKey' :: Bool -> Sha -> FileMode -> Annex (Maybe Key) -catKey' modeguaranteed sha mode - | isSymLink mode = do - l <- catLink modeguaranteed sha - return $ if isLinkToAnnex l - then fileKey $ takeFileName l - else Nothing - | otherwise = return Nothing +{- Only look at the first line of a pointer file. -} +parsePointer :: String -> Maybe Key +parsePointer s = headMaybe (lines s) >>= file2key {- Gets a symlink target. -} -catLink :: Bool -> Sha -> Annex String -catLink modeguaranteed sha = fromInternalGitPath . decodeBS <$> get +catSymLinkTarget :: Sha -> Annex String +catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get where - -- If the mode is not guaranteed to be correct, avoid - -- buffering the whole file content, which might be large. - -- 8192 is enough if it really is a symlink. - get - | modeguaranteed = catObject sha - | otherwise = L.take 8192 <$> catObject sha - -{- Looks up the key corresponding to the Ref using the running cat-file. - - - - Currently this always has to look in HEAD, because cat-file --batch - - does not offer a way to specify that we want to look up a tree object - - in the index. So if the index has a file staged not as a symlink, - - and it is a symlink in head, the wrong mode is gotten. - - Also, we have to assume the file is a symlink if it's not yet committed - - to HEAD. For these reasons, modeguaranteed is not set. - -} -catKeyChecked :: Bool -> Ref -> Annex (Maybe Key) -catKeyChecked needhead ref@(Ref r) = - catKey' False ref =<< findmode <$> catTree treeref - where - pathparts = split "/" r - dir = intercalate "/" $ take (length pathparts - 1) pathparts - file = fromMaybe "" $ lastMaybe pathparts - treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/" - findmode = fromMaybe symLinkMode . headMaybe . - map snd . filter (\p -> fst p == file) + -- Avoid buffering the whole file content, which might be large. + -- 8192 is enough if it really is a symlink or pointer file. + get = L.take 8192 <$> catObject sha {- From a file in the repository back to the key. - @@ -151,8 +136,8 @@ catKeyChecked needhead ref@(Ref r) = catKeyFile :: FilePath -> Annex (Maybe Key) catKeyFile f = ifM (Annex.getState Annex.daemon) ( catKeyFileHEAD f - , catKeyChecked True $ Git.Ref.fileRef f + , catKey $ Git.Ref.fileRef f ) catKeyFileHEAD :: FilePath -> Annex (Maybe Key) -catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f +catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 495ff5e75f..803f020cac 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -53,8 +53,8 @@ stageDirect = do {- Determine what kind of modified or deleted file this is, as - efficiently as we can, by getting any key that's associated - with it in git, as well as its stat info. -} - go (file, Just sha, Just mode) = withTSDelta $ \delta -> do - shakey <- catKey sha mode + go (file, Just sha, Just _mode) = withTSDelta $ \delta -> do + shakey <- catKey sha mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file mcache <- liftIO $ maybe (pure Nothing) (toInodeCache delta file) mstat filekey <- isAnnexLink file @@ -107,8 +107,8 @@ preCommitDirect = do withkey (DiffTree.srcsha diff) (DiffTree.srcmode diff) removeAssociatedFile withkey (DiffTree.dstsha diff) (DiffTree.dstmode diff) addAssociatedFile where - withkey sha mode a = when (sha /= nullSha) $ do - k <- catKey sha mode + withkey sha _mode a = when (sha /= nullSha) $ do + k <- catKey sha case k of Nothing -> noop Just key -> void $ a key $ @@ -256,16 +256,16 @@ updateWorkTree d oldref force = do makeabs <- flip fromTopFilePath <$> gitRepo let fsitems = zip (map (makeabs . DiffTree.file) items) items forM_ fsitems $ - go makeabs DiffTree.srcsha DiffTree.srcmode moveout moveout_raw + go makeabs DiffTree.srcsha moveout moveout_raw forM_ fsitems $ - go makeabs DiffTree.dstsha DiffTree.dstmode movein movein_raw + go makeabs DiffTree.dstsha movein movein_raw void $ liftIO cleanup where - go makeabs getsha getmode a araw (f, item) + go makeabs getsha a araw (f, item) | getsha item == nullSha = noop | otherwise = void $ tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f) - =<< catKey (getsha item) (getmode item) + =<< catKey (getsha item) moveout _ _ = removeDirect diff --git a/Annex/View.hs b/Annex/View.hs index 2b8a80e5f2..567522a541 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -413,13 +413,13 @@ withViewChanges addmeta removemeta = do handleremovals item | DiffTree.srcsha item /= nullSha = handlechange item removemeta - =<< catKey (DiffTree.srcsha item) (DiffTree.srcmode item) + =<< catKey (DiffTree.srcsha item) | otherwise = noop handleadds makeabs item | DiffTree.dstsha item /= nullSha = handlechange item addmeta =<< ifM isDirect - ( catKey (DiffTree.dstsha item) (DiffTree.dstmode item) + ( catKey (DiffTree.dstsha item) -- optimisation , isAnnexLink $ makeabs $ DiffTree.file item ) diff --git a/Backend.hs b/Backend.hs index 922d0c2a71..28f83c7e01 100644 --- a/Backend.hs +++ b/Backend.hs @@ -26,7 +26,6 @@ import Annex.Link import Types.Key import Types.KeySource import qualified Types.Backend as B -import Config -- When adding a new backend, import it here and add it to the list. import qualified Backend.Hash @@ -81,22 +80,17 @@ genKey' (b:bs) source = do {- Looks up the key corresponding to an annexed file, - by examining what the file links to. - - - In direct mode, there is often no link on disk, in which case - - the symlink is looked up in git instead. However, a real link - - on disk still takes precedence over what was committed to git in direct - - mode. + - An unlocked file will not have a link on disk, so fall back to + - looking for a pointer to a key in git. -} lookupFile :: FilePath -> Annex (Maybe Key) lookupFile file = do mkey <- isAnnexLink file case mkey of Just key -> makeret key - Nothing -> ifM isDirect - ( maybe (return Nothing) makeret =<< catKeyFile file - , return Nothing - ) + Nothing -> maybe (return Nothing) makeret =<< catKeyFile file where - makeret k = return $ Just k + makeret = return . Just getBackend :: FilePath -> Key -> Annex (Maybe Backend) getBackend file k = let bname = keyBackendName k in diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 8d253e47df..0b6cc1e781 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -80,7 +80,7 @@ withFilesInRefs a = mapM_ go l <- inRepo $ LsTree.lsTree (Git.Ref r) forM_ l $ \i -> do let f = getTopFilePath $ LsTree.file i - v <- catKey (Git.Ref $ LsTree.sha i) (LsTree.mode i) + v <- catKey (Git.Ref $ LsTree.sha i) case v of Nothing -> noop Just k -> whenM (matcher $ MatchingKey k) $ diff --git a/Command/Indirect.hs b/Command/Indirect.hs index c12c91a484..f5234b4dc8 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -76,7 +76,7 @@ perform = do return Nothing | otherwise -> maybe noop (fromdirect f) - =<< catKey sha mode + =<< catKey sha _ -> noop go _ = noop diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 6cca8035e6..c2dc285401 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -11,6 +11,7 @@ import Common.Annex import Command import Types.Key import Annex.Content +import Annex.CatFile import Annex.MetaData import Annex.FileMatcher import Types.KeySource @@ -100,17 +101,11 @@ ingest file = do =<< liftIO (getFileStatus file) return k +-- Could add a newline and some text explaining this file is a pointer. +-- parsePointer only looks at the first line. emitPointer :: Key -> IO () emitPointer = putStrLn . key2file -parsePointer :: String -> Maybe Key -parsePointer s - | length s' >= maxsz = Nothing -- too long to be a key pointer - | otherwise = headMaybe (lines s') >>= file2key - where - s' = take maxsz s - maxsz = 81920 - updateAssociatedFiles :: Key -> FilePath -> Annex () updateAssociatedFiles k f = do h <- AssociatedFiles.openDb diff --git a/Command/Undo.hs b/Command/Undo.hs index c647dfba4d..0692dce342 100644 --- a/Command/Undo.hs +++ b/Command/Undo.hs @@ -72,7 +72,7 @@ perform p = do f <- mkrel di whenM isDirect $ maybe noop (`removeDirect` f) - =<< catKey (srcsha di) (srcmode di) + =<< catKey (srcsha di) liftIO $ nukeFile f forM_ adds $ \di -> do @@ -80,6 +80,6 @@ perform p = do inRepo $ Git.run [Param "checkout", Param "--", File f] whenM isDirect $ maybe noop (`toDirect` f) - =<< catKey (dstsha di) (dstmode di) + =<< catKey (dstsha di) next $ liftIO cleanup From 712c9fc590800d52369415a792d57493caf5f025 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 7 Dec 2015 15:45:08 -0400 Subject: [PATCH 19/96] require "annex/objects/" before key in pointer files This removes ambiguity, because while someone might have "WORM--foo" in a file that's not intended to be a git-annex pointer file, "annex/objects/WORM--foo" is less likely. Also, 664cc987e806800ef46794f356003321170c1060 had a caveat about symlink targets being parsed as pointer files, and now the same parser is used for both. I did not include any hash directories before the key in the pointer file, as they're not needed. However, if they were included, the parser would still work ok. --- Annex/CatFile.hs | 18 +++++++----------- Command/Smudge.hs | 4 +++- doc/todo/smudge.mdwn | 3 ++- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 47ea86a31f..7c0022ca59 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -83,16 +83,8 @@ catFileStop = do {- From ref to a symlink or a pointer file, get the key. -} catKey :: Ref -> Annex (Maybe Key) -catKey ref = do - o <- catObject ref - if L.length o > maxsz - then return Nothing -- too big - else do - let l = decodeBS o - let l' = fromInternalGitPath l - return $ if isLinkToAnnex l' - then fileKey $ takeFileName l' - else parsePointer l +catKey ref = parsePointer . fromInternalGitPath . decodeBS . L.take maxsz + <$> catObject ref where -- Want to avoid buffering really big files in git into memory. -- 8192 bytes is plenty for a pointer to a key. @@ -102,7 +94,11 @@ catKey ref = do {- Only look at the first line of a pointer file. -} parsePointer :: String -> Maybe Key -parsePointer s = headMaybe (lines s) >>= file2key +parsePointer s = headMaybe (lines s) >>= go + where + go l + | isLinkToAnnex l = file2key $ takeFileName l + | otherwise = Nothing {- Gets a symlink target. -} catSymLinkTarget :: Sha -> Annex String diff --git a/Command/Smudge.hs b/Command/Smudge.hs index c2dc285401..e08afed6bb 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -18,6 +18,7 @@ import Types.KeySource import Backend import Logs.Location import qualified Database.AssociatedFiles as AssociatedFiles +import Git.FilePath import qualified Data.ByteString.Lazy as B @@ -104,7 +105,8 @@ ingest file = do -- Could add a newline and some text explaining this file is a pointer. -- parsePointer only looks at the first line. emitPointer :: Key -> IO () -emitPointer = putStrLn . key2file +emitPointer k = putStrLn $ toInternalGitPath $ + pathSeparator:objectDir key2file k updateAssociatedFiles :: Key -> FilePath -> Annex () updateAssociatedFiles k f = do diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 74a1435809..eb8ce416bc 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -158,7 +158,8 @@ Using git-annex on a crippled filesystem that does not support symlinks. Data: * An annex pointer file has as its first line the git-annex key - that it's standing in for. Subsequent lines of the file might + that it's standing in for (prefixed with "annex/objects/", similar to + an annex symlink target). Subsequent lines of the file might be a message saying that the file's content is not currently available. An annex pointer file is checked into the git repository the same way that an annex symlink is checked in. From 9923b8dc77720121543fdc066561f6ec2d8bb811 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 7 Dec 2015 17:24:16 -0400 Subject: [PATCH 20/96] long walk led to long list of things to do --- doc/todo/smudge.mdwn | 48 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index eb8ce416bc..6bb07a825d 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -249,7 +249,8 @@ The file map needs to map from `Key -> [File]`. `File -> Key` seems useful to have, but in practice is not worthwhile. Drop and get operations need to know what files in the work tree use a -given key in order to update the work tree. +given key in order to update the work tree. And, we don't want to +overwrite a work tree file if it's been modified when dropping or getting. git-annex commands that look at annex symlinks to get keys to act on will need fall back to either consulting the file map, or looking at the staged @@ -322,6 +323,51 @@ will have all files unlocked, necessarily in all clones. This happens automatically, because when the direct repos are upgraded that causes the files to be unlocked, while the indirect upgrades don't touch the files. +#### implementation todo list + +* Reconcile staged changes into the associated files database, whenever + the database is queried. +* See if the case where this is not used can be optimised. Eg, if the + associated files database doesn't exist at all, we know smudge/clean are + not used, so queries for associated files don't need to open the database + or do reconciliation, but can simply return none. + Also, no need for Backend.lookupFile to catKeyFile in this case + (when not in direct mode). +* Update pointer files when adding the content of a key to the annex + (ie, `git annex get`). + - Check the associated files database to find associated files for the key. + - Check worktree file to ensure it's still a pointer to the key. + - Hard-link to annex object. +* Update pointer files when dropping the content of a key. + - Check the associated files database to find associated files for the key. + - Verify that worktree files are not modified from the annexed object. + How? InodeCache could be maintained, but the smudge filer interface + wouldn't let it be updated when smudging a file. May need to take + an expensive path: + 1. stat object file + 2. stat worktree file + 3. if same stat, ok else hash worktree file + 4. stat worktree file again after checking hash; make sure it's + unchanged from earlier stat +* Convert `git annex unlock` to stage a pointer file, and hard link to the + annexed object (or write pointer file if annexed object not present). + - Also needs to thaw annex object file + - Also needs to update associated files db. +* Convert `git annex lock` to verify that worktree file is not modified + (same check used when updating pointer files to the content of a key), + and then delete the worktree file and replace with an annex symlink. + - Allow --force to override the check and throw away modified content. + - Also needs to update associated files db. + - Also should check associated files db, and if there are no other + unlocked files for the key, freeze its object file. +* Make v6 upgrade convert direct mode repo to repo with all unlocked + files. +* fsck will need some fixes to handle unlocked files. +* Make automatic merge conflict resolution work for pointer files. + - Should probably automatically handle merge conflicts between annex + symlinks and pointer files too. Maybe by always resulting in a pointer + file, since the symlinks don't work everwhere. + ---- ### test files From 37c9026c6e52486bd1232567ae232132531ed1bd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 8 Dec 2015 13:07:45 -0400 Subject: [PATCH 21/96] todo --- doc/todo/smudge.mdwn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 6bb07a825d..949de27daf 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -325,6 +325,9 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list +* inAnnex check should fail in the case where an annexed objects is unlocked + and has had its content changed. Could use an InodeCache for + such objects. This parallels how inAnnex checks work for direct mode. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the case where this is not used can be optimised. Eg, if the From 78a6b8ce057c10303254448bba46d58970a44bf5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 14:25:33 -0400 Subject: [PATCH 22/96] refactor and improve pointer file handling code --- Annex/CatFile.hs | 20 ++------------------ Annex/Init.hs | 2 +- Annex/Link.hs | 36 +++++++++++++++++++++++++++++++++++- Backend.hs | 1 - Command/Smudge.hs | 20 ++++++-------------- 5 files changed, 44 insertions(+), 35 deletions(-) diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 7c0022ca59..aefccd4240 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -14,7 +14,6 @@ module Annex.CatFile ( catFileHandle, catFileStop, catKey, - parsePointer, catKeyFile, catKeyFileHEAD, catSymLinkTarget, @@ -31,7 +30,7 @@ import qualified Annex import Git.Types import Git.FilePath import qualified Git.Ref -import Types.Key +import Annex.Link catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile branch file = do @@ -83,22 +82,7 @@ catFileStop = do {- From ref to a symlink or a pointer file, get the key. -} catKey :: Ref -> Annex (Maybe Key) -catKey ref = parsePointer . fromInternalGitPath . decodeBS . L.take maxsz - <$> catObject ref - where - -- Want to avoid buffering really big files in git into memory. - -- 8192 bytes is plenty for a pointer to a key. - -- Pad some more to allow for any pointer files that might have - -- lines after the key explaining what the file is used for. - maxsz = 81920 - -{- Only look at the first line of a pointer file. -} -parsePointer :: String -> Maybe Key -parsePointer s = headMaybe (lines s) >>= go - where - go l - | isLinkToAnnex l = file2key $ takeFileName l - | otherwise = Nothing +catKey ref = parseLinkOrPointer <$> catObject ref {- Gets a symlink target. -} catSymLinkTarget :: Sha -> Annex String diff --git a/Annex/Init.hs b/Annex/Init.hs index b00e412180..7eea0dfa13 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -29,11 +29,11 @@ import Types.TrustLevel import Annex.Version import Annex.Difference import Annex.UUID +import Annex.Link import Config import Annex.Direct import Annex.Content.Direct import Annex.Environment -import Backend import Annex.Hook import Upgrade #ifndef mingw32_HOST_OS diff --git a/Annex/Link.hs b/Annex/Link.hs index 98b200f0a0..f405403f26 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -5,7 +5,9 @@ - On other filesystems, git instead stores the symlink target in a regular - file. - - - Copyright 2013 Joey Hess + - Pointer files are used instead of symlinks for unlocked files. + - + - Copyright 2013-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -19,6 +21,9 @@ import qualified Git.UpdateIndex import qualified Annex.Queue import Git.Types import Git.FilePath +import Types.Key + +import qualified Data.ByteString.Lazy as L type LinkTarget = String @@ -110,3 +115,32 @@ stageSymlink :: FilePath -> Sha -> Annex () stageSymlink file sha = Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.stageSymlink file sha) + +{- Parses a symlink target or a pointer file to a Key. + - Only looks at the first line, as pointer files can have subsequent + - lines. -} +parseLinkOrPointer :: L.ByteString -> Maybe Key +parseLinkOrPointer = parseLinkOrPointer' . decodeBS . L.take maxsz + where + {- Want to avoid buffering really big files in git into + - memory when reading files that may be pointers. + - + - 8192 bytes is plenty for a pointer to a key. + - Pad some more to allow for any pointer files that might have + - lines after the key explaining what the file is used for. -} + maxsz = 81920 + +parseLinkOrPointer' :: String -> Maybe Key +parseLinkOrPointer' s = headMaybe (lines (fromInternalGitPath s)) >>= go + where + go l + | isLinkToAnnex l = file2key $ takeFileName l + | otherwise = Nothing + +formatPointer :: Key -> String +formatPointer k = toInternalGitPath $ pathSeparator:objectDir key2file k + +{- Checks if a file is a pointer to a key. -} +isPointerFile :: FilePath -> Annex (Maybe Key) +isPointerFile f = liftIO $ catchDefaultIO Nothing $ + parseLinkOrPointer <$> L.readFile f diff --git a/Backend.hs b/Backend.hs index 28f83c7e01..d37eed34af 100644 --- a/Backend.hs +++ b/Backend.hs @@ -11,7 +11,6 @@ module Backend ( genKey, lookupFile, getBackend, - isAnnexLink, chooseBackend, lookupBackendName, maybeLookupBackendName, diff --git a/Command/Smudge.hs b/Command/Smudge.hs index e08afed6bb..f9f819bec5 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -9,16 +9,14 @@ module Command.Smudge where import Common.Annex import Command -import Types.Key import Annex.Content -import Annex.CatFile +import Annex.Link import Annex.MetaData import Annex.FileMatcher import Types.KeySource import Backend import Logs.Location import qualified Database.AssociatedFiles as AssociatedFiles -import Git.FilePath import qualified Data.ByteString.Lazy as B @@ -46,16 +44,13 @@ seek o = commandAction $ -- available annex object, should output its content. smudge :: FilePath -> CommandStart smudge file = do - liftIO $ fileEncoding stdin - s <- liftIO $ hGetContents stdin - case parsePointer s of - Nothing -> liftIO $ putStr s + b <- liftIO $ B.hGetContents stdin + case parseLinkOrPointer b of + Nothing -> liftIO $ B.putStr b Just k -> do updateAssociatedFiles k file content <- calcRepo (gitAnnexLocation k) - liftIO $ maybe - (putStr s) - (B.hPut stdout) + liftIO $ B.hPut stdout . fromMaybe b =<< catchMaybeIO (B.readFile content) stop @@ -102,11 +97,8 @@ ingest file = do =<< liftIO (getFileStatus file) return k --- Could add a newline and some text explaining this file is a pointer. --- parsePointer only looks at the first line. emitPointer :: Key -> IO () -emitPointer k = putStrLn $ toInternalGitPath $ - pathSeparator:objectDir key2file k +emitPointer = putStrLn . formatPointer updateAssociatedFiles :: Key -> FilePath -> Annex () updateAssociatedFiles k f = do From 05b598a0575e3ce58b3206e9964efd2aa6458ca5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 14:55:47 -0400 Subject: [PATCH 23/96] stash DbHandle in Annex state --- Annex.hs | 3 +++ Command/Smudge.hs | 6 ++---- Database/AssociatedFiles.hs | 33 +++++++++++++++++++++++-------- Database/AssociatedFiles/Types.hs | 14 +++++++++++++ Database/Handle.hs | 4 ++-- 5 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 Database/AssociatedFiles/Types.hs diff --git a/Annex.hs b/Annex.hs index c9a4ef6a05..5c9ec4cd41 100644 --- a/Annex.hs +++ b/Annex.hs @@ -60,6 +60,7 @@ import Types.NumCopies import Types.LockCache import Types.DesktopNotify import Types.CleanupActions +import qualified Database.AssociatedFiles.Types #ifdef WITH_QUVI import Utility.Quvi (QuviVersion) #endif @@ -134,6 +135,7 @@ data AnnexState = AnnexState , desktopnotify :: DesktopNotify , workers :: [Either AnnexState (Async AnnexState)] , concurrentjobs :: Maybe Int + , associatedfilesdbhandle :: Maybe Database.AssociatedFiles.Types.DbHandle } newState :: GitConfig -> Git.Repo -> AnnexState @@ -179,6 +181,7 @@ newState c r = AnnexState , desktopnotify = mempty , workers = [] , concurrentjobs = Nothing + , associatedfilesdbhandle = Nothing } {- Makes an Annex state object for the specified git repo. diff --git a/Command/Smudge.hs b/Command/Smudge.hs index f9f819bec5..7462963212 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -102,7 +102,5 @@ emitPointer = putStrLn . formatPointer updateAssociatedFiles :: Key -> FilePath -> Annex () updateAssociatedFiles k f = do - h <- AssociatedFiles.openDb - liftIO $ do - AssociatedFiles.addDb h k f - AssociatedFiles.closeDb h + AssociatedFiles.addDb k f + AssociatedFiles.flushDb diff --git a/Database/AssociatedFiles.hs b/Database/AssociatedFiles.hs index 8244f15e80..d17eb8112d 100644 --- a/Database/AssociatedFiles.hs +++ b/Database/AssociatedFiles.hs @@ -13,6 +13,7 @@ module Database.AssociatedFiles ( DbHandle, openDb, + flushDb, closeDb, addDb, getDb, @@ -21,6 +22,7 @@ module Database.AssociatedFiles ( ) where import Database.Types +import Database.AssociatedFiles.Types import qualified Database.Handle as H import Locations import Common hiding (delete) @@ -33,8 +35,6 @@ import Messages import Database.Persist.TH import Database.Esqueleto hiding (Key) -newtype DbHandle = DbHandle H.DbHandle - share [mkPersist sqlSettings, mkMigrate "migrateAssociated"] [persistLowerCase| Associated key SKey @@ -64,8 +64,25 @@ openDb = withExclusiveLock gitAnnexAssociatedFilesDbLock $ do closeDb :: DbHandle -> IO () closeDb (DbHandle h) = H.closeDb h -addDb :: DbHandle -> Key -> FilePath -> IO () -addDb (DbHandle h) k f = H.queueDb h (\_ _ -> pure True) $ do +withDbHandle :: (H.DbHandle -> IO a) -> Annex a +withDbHandle a = do + (DbHandle h) <- dbHandle + liftIO $ a h + +dbHandle :: Annex DbHandle +dbHandle = maybe startup return =<< Annex.getState Annex.associatedfilesdbhandle + where + startup = do + h <- openDb + Annex.changeState $ \s -> s { Annex.associatedfilesdbhandle = Just h } + return h + +{- Flushes any changes made to the database. -} +flushDb :: Annex () +flushDb = withDbHandle H.flushQueueDb + +addDb :: Key -> FilePath -> Annex () +addDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do -- If the same file was associated with a different key before, -- remove that. delete $ from $ \r -> do @@ -76,8 +93,8 @@ addDb (DbHandle h) k f = H.queueDb h (\_ _ -> pure True) $ do {- Note that the files returned used to be associated with the key, but - some of them may not be any longer. -} -getDb :: DbHandle -> Key -> IO [FilePath] -getDb (DbHandle h) = H.queryDb h . getDb' . toSKey +getDb :: Key -> Annex [FilePath] +getDb k = withDbHandle $ \h -> H.queryDb h $ getDb' $ toSKey k getDb' :: SKey -> SqlPersistM [FilePath] getDb' sk = do @@ -86,8 +103,8 @@ getDb' sk = do return (r ^. AssociatedFile) return $ map unValue l -removeDb :: DbHandle -> Key -> FilePath -> IO () -removeDb (DbHandle h) k f = H.queueDb h (\_ _ -> pure True) $ +removeDb :: Key -> FilePath -> Annex () +removeDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ delete $ from $ \r -> do where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f) where diff --git a/Database/AssociatedFiles/Types.hs b/Database/AssociatedFiles/Types.hs new file mode 100644 index 0000000000..8c32dcf222 --- /dev/null +++ b/Database/AssociatedFiles/Types.hs @@ -0,0 +1,14 @@ +{- Sqlite database used for tracking a key's associated files, data types. + - + - Copyright 2015 Joey Hess + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +module Database.AssociatedFiles.Types ( + DbHandle(..) +) where + +import qualified Database.Handle as H + +newtype DbHandle = DbHandle H.DbHandle diff --git a/Database/Handle.hs b/Database/Handle.hs index 439e7c18bc..6d312df685 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -21,7 +21,6 @@ module Database.Handle ( import Utility.Exception import Utility.Monad -import Messages import Database.Persist.Sqlite import qualified Database.Sqlite as Sqlite @@ -35,6 +34,7 @@ import Control.Monad.Trans.Resource (runResourceT) import Control.Monad.Logger (runNoLoggingT) import Data.List import Data.Time.Clock +import System.IO {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} @@ -79,7 +79,7 @@ type TableName = String workerThread :: T.Text -> TableName -> MVar Job -> IO () workerThread db tablename jobs = catchNonAsync (run loop) showerr where - showerr e = liftIO $ warningIO $ + showerr e = liftIO $ hPutStrLn stderr $ "sqlite worker thread crashed: " ++ show e loop = do From 751120c17156662ec0cd758b0fa374f6acd8c086 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 15:18:25 -0400 Subject: [PATCH 24/96] avoid pre-commit hook messing up new-style unlocked files in v6 repo --- Annex/Version.hs | 6 ++++++ CmdLine/Seek.hs | 2 +- Command/PreCommit.hs | 15 +++++++++++---- doc/git-annex-pre-commit.mdwn | 8 ++++++-- 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Annex/Version.hs b/Annex/Version.hs index f9b24d9c49..4c2a990fa8 100644 --- a/Annex/Version.hs +++ b/Annex/Version.hs @@ -43,6 +43,12 @@ versionSupportsDirectMode = go <$> getVersion go (Just "6") = False go _ = True +versionSupportsUnlockedPointers :: Annex Bool +versionSupportsUnlockedPointers = go <$> getVersion + where + go (Just "6") = True + go _ = False + setVersion :: Version -> Annex () setVersion = setConfig versionField diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 0b6cc1e781..48545ce042 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -115,7 +115,7 @@ withPairs a params = seekActions $ return $ map a $ pairs [] params pairs c (x:y:xs) = pairs ((x,y):c) xs pairs _ _ = error "expected pairs" -withFilesToBeCommitted :: (String -> CommandStart) -> CmdParams -> CommandSeek +withFilesToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek withFilesToBeCommitted a params = seekActions $ prepFiltered a $ seekHelper LsFiles.stagedNotDeleted params diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 2d62b51f3f..b6f52d01c2 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -16,7 +16,9 @@ import qualified Command.Add import qualified Command.Fix import Annex.Direct import Annex.Hook +import Annex.Link import Annex.View +import Annex.Version import Annex.View.ViewedFile import Annex.LockFile import Logs.View @@ -49,9 +51,14 @@ seek ps = lockPreCommitHook $ ifM isDirect void $ liftIO cleanup , do -- fix symlinks to files being committed - withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps + flip withFilesToBeCommitted ps $ \f -> + maybe stop (Command.Fix.start f) + =<< isAnnexLink f -- inject unlocked files into the annex - withFilesUnlockedToBeCommitted startIndirect ps + -- (not needed when repo version uses + -- unlocked pointer files) + unlessM versionSupportsUnlockedPointers $ + withFilesUnlockedToBeCommitted startInjectUnlocked ps ) runAnnexHook preCommitAnnexHook -- committing changes to a view updates metadata @@ -64,8 +71,8 @@ seek ps = lockPreCommitHook $ ifM isDirect ) -startIndirect :: FilePath -> CommandStart -startIndirect f = next $ do +startInjectUnlocked :: FilePath -> CommandStart +startInjectUnlocked f = next $ do unlessM (callCommandAction $ Command.Add.start f) $ error $ "failed to add " ++ f ++ "; canceling commit" next $ return True diff --git a/doc/git-annex-pre-commit.mdwn b/doc/git-annex-pre-commit.mdwn index bc1e86e182..21e5aef68f 100644 --- a/doc/git-annex-pre-commit.mdwn +++ b/doc/git-annex-pre-commit.mdwn @@ -12,10 +12,14 @@ This is meant to be called from git's pre-commit hook. `git annex init` automatically creates a pre-commit hook using this. Fixes up symlinks that are staged as part of a commit, to ensure they -point to annexed content. Also handles injecting changes to unlocked -files into the annex. When in a view, updates metadata to reflect changes +point to annexed content. + +When in a view, updates metadata to reflect changes made to files in the view. +When in a repository that has not been upgraded to annex.version 6, +also handles injecting changes to unlocked files into the annex. + # SEE ALSO [[git-annex]](1) From ba39f993f52ae9d191a0bd80512a1f0278d3c35d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 15:24:32 -0400 Subject: [PATCH 25/96] avoid clean filter trying to annex a pointer file --- Command/Smudge.hs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 7462963212..9ce95d4ef2 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -54,22 +54,23 @@ smudge file = do =<< catchMaybeIO (B.readFile content) stop --- Clean filter decides if a file should be stored in the annex, and --- outputs a pointer to its injested content. +-- Clean filter is fed file content on stdin, decides if a file +-- should be stored in the annex, and outputs a pointer to its +-- injested content. clean :: FilePath -> CommandStart clean file = do - ifM (shouldAnnex file) - ( do - k <- ingest file - updateAssociatedFiles k file - liftIO $ emitPointer k - , liftIO cat - ) + b <- liftIO $ B.hGetContents stdin + if isJust (parseLinkOrPointer b) + then liftIO $ B.hPut stdout b + else ifM (shouldAnnex file) + ( do + k <- ingest file + updateAssociatedFiles k file + liftIO $ emitPointer k + , liftIO $ B.hPut stdout b + ) stop -cat :: IO () -cat = B.hGetContents stdin >>= B.hPut stdout - shouldAnnex :: FilePath -> Annex Bool shouldAnnex file = do matcher <- largeFilesMatcher From 8a818088a390046700a6415df8ac2ab4c5c956b1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 15:25:14 -0400 Subject: [PATCH 26/96] link/copy pointer files to object content when it's added --- Annex/Content.hs | 20 ++++++++++++++++++-- doc/todo/smudge.mdwn | 15 ++++++--------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 73cb6ab012..d3bf4f94fe 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -72,6 +72,7 @@ import qualified Types.Backend import qualified Backend import Types.NumCopies import Annex.UUID +import qualified Database.AssociatedFiles as AssociatedFiles {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool @@ -414,7 +415,10 @@ checkDiskSpace destination key alreadythere samefilesystem = ifM (Annex.getState {- Moves a key's content into .git/annex/objects/ - - - In direct mode, moves it to the associated file, or files. + - When a key has associated pointer files, the object is hard + - linked (or copied) to the files, and the object file is left thawed. + + - In direct mode, moves the object file to the associated file, or files. - - What if the key there already has content? This could happen for - various reasons; perhaps the same content is being annexed again. @@ -442,7 +446,10 @@ moveAnnex key src = withObjectLoc key storeobject storedirect ( alreadyhave , modifyContent dest $ do liftIO $ moveFile src dest - freezeContent dest + fs <- AssociatedFiles.getDb key + if null fs + then freezeContent dest + else mapM_ (populateAssociatedFile key dest) fs ) storeindirect = storeobject =<< calcRepo (gitAnnexLocation key) @@ -472,6 +479,15 @@ moveAnnex key src = withObjectLoc key storeobject storedirect alreadyhave = liftIO $ removeFile src +populateAssociatedFile :: Key -> FilePath -> FilePath -> Annex () +populateAssociatedFile k obj f = go =<< isPointerFile f + where + go (Just k') | k == k' = liftIO $ do + nukeFile f + unlessM (catchBoolIO $ createLinkOrCopy obj f) $ + writeFile f (formatPointer k) + go _ = return () + {- Hard links a file into .git/annex/objects/, falling back to a copy - if necessary. - diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 949de27daf..373c655617 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -330,17 +330,14 @@ files to be unlocked, while the indirect upgrades don't touch the files. such objects. This parallels how inAnnex checks work for direct mode. * Reconcile staged changes into the associated files database, whenever the database is queried. -* See if the case where this is not used can be optimised. Eg, if the - associated files database doesn't exist at all, we know smudge/clean are - not used, so queries for associated files don't need to open the database - or do reconciliation, but can simply return none. +* See if the case where the associated files database is not used can be + optimised. Eg, if the associated files database doesn't exist at all, + we know smudge/clean are not used, so queries for associated files don't + need to open the database or do reconciliation, but can simply return none. Also, no need for Backend.lookupFile to catKeyFile in this case (when not in direct mode). -* Update pointer files when adding the content of a key to the annex - (ie, `git annex get`). - - Check the associated files database to find associated files for the key. - - Check worktree file to ensure it's still a pointer to the key. - - Hard-link to annex object. + However, beware over-optimisation breaking the assistant or perhaps other + long-lived processes. * Update pointer files when dropping the content of a key. - Check the associated files database to find associated files for the key. - Verify that worktree files are not modified from the annexed object. From 3311c486312a340fd8a27d10336345e54ab791ab Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 15:42:16 -0400 Subject: [PATCH 27/96] move InodeSentinal from direct mode code to its own module Will be used outside of direct mode for v6 unlocked files, and is already used outside of direct mode when adding files to annex. --- Annex/Content.hs | 6 ++- Annex/Content/Direct.hs | 60 ++---------------------------- Annex/Direct.hs | 4 ++ Annex/Init.hs | 4 +- Annex/InodeSentinal.hs | 67 ++++++++++++++++++++++++++++++++++ Assistant/Threads/Committer.hs | 1 + Command/Add.hs | 1 + Upgrade/V5.hs | 7 ++++ doc/todo/smudge.mdwn | 4 +- 9 files changed, 93 insertions(+), 61 deletions(-) create mode 100644 Annex/InodeSentinal.hs diff --git a/Annex/Content.hs b/Annex/Content.hs index d3bf4f94fe..564bc2dcae 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -1,6 +1,6 @@ {- git-annex file content managing - - - Copyright 2010-2014 Joey Hess + - Copyright 2010-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -72,6 +72,7 @@ import qualified Types.Backend import qualified Backend import Types.NumCopies import Annex.UUID +import Annex.InodeSentinal import qualified Database.AssociatedFiles as AssociatedFiles {- Checks if a given key's content is currently present. -} @@ -583,6 +584,9 @@ cleanObjectLoc key cleaner = do <=< catchMaybeIO $ removeDirectory dir {- Removes a key's file from .git/annex/objects/ + - + - When a key has associated pointer files, they are checked for + - modifications, and if unmodified, are reset. - - In direct mode, deletes the associated files or files, and replaces - them with symlinks. diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 86e053d7f7..1edcbaed51 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -1,12 +1,13 @@ {- git-annex file content managing for direct mode + - + - This is deprecated, and will be removed when direct mode gets removed + - from git-annex. - - Copyright 2012-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP #-} - module Annex.Content.Direct ( associatedFiles, associatedFilesRelative, @@ -26,15 +27,10 @@ module Annex.Content.Direct ( sameFileStatus, removeInodeCache, toInodeCache, - inodesChanged, - createInodeSentinalFile, addContentWhenNotPresent, - withTSDelta, - getTSDelta, ) where import Common.Annex -import qualified Annex import Annex.Perms import qualified Git import Utility.Tmp @@ -43,6 +39,7 @@ import Utility.InodeCache import Utility.CopyFile import Annex.ReplaceFile import Annex.Link +import Annex.InodeSentinal {- Absolute FilePaths of Files in the tree that are associated with a key. -} associatedFiles :: Key -> Annex [FilePath] @@ -212,52 +209,3 @@ addContentWhenNotPresent key contentfile associatedfile = do replaceFile associatedfile $ liftIO . void . copyFileExternal CopyAllMetaData contentfile updateInodeCache key associatedfile - -{- Some filesystems get new inodes each time they are mounted. - - In order to work on such a filesystem, a sentinal file is used to detect - - when the inodes have changed. - - - - If the sentinal file does not exist, we have to assume that the - - inodes have changed. - -} -inodesChanged :: Annex Bool -inodesChanged = sentinalInodesChanged <$> sentinalStatus - -withTSDelta :: (TSDelta -> Annex a) -> Annex a -withTSDelta a = a =<< getTSDelta - -getTSDelta :: Annex TSDelta -#ifdef mingw32_HOST_OS -getTSDelta = sentinalTSDelta <$> sentinalStatus -#else -getTSDelta = pure noTSDelta -- optimisation -#endif - -sentinalStatus :: Annex SentinalStatus -sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus - where - check = do - sc <- liftIO . checkSentinalFile =<< annexSentinalFile - Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc } - return sc - -{- The sentinal file is only created when first initializing a repository. - - If there are any annexed objects in the repository already, creating - - the file would invalidate their inode caches. -} -createInodeSentinalFile :: Annex () -createInodeSentinalFile = unlessM (alreadyexists <||> hasobjects) $ do - s <- annexSentinalFile - createAnnexDirectory (parentDir (sentinalFile s)) - liftIO $ writeSentinalFile s - where - alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile - hasobjects = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir - -annexSentinalFile :: Annex SentinalFile -annexSentinalFile = do - sentinalfile <- fromRepo gitAnnexInodeSentinal - sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache - return SentinalFile - { sentinalFile = sentinalfile - , sentinalCacheFile = sentinalcachefile - } diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 803f020cac..8fced2d444 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -1,4 +1,7 @@ {- git-annex direct mode + - + - This is deprecated, and will be removed when direct mode gets removed + - from git-annex. - - Copyright 2012-2014 Joey Hess - @@ -36,6 +39,7 @@ import Annex.VariantFile import Git.Index import Annex.Index import Annex.LockFile +import Annex.InodeSentinal {- Uses git ls-files to find files that need to be committed, and stages - them into the index. Returns True if some changes were staged. -} diff --git a/Annex/Init.hs b/Annex/Init.hs index 7eea0dfa13..997312c31e 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -32,9 +32,9 @@ import Annex.UUID import Annex.Link import Config import Annex.Direct -import Annex.Content.Direct import Annex.Environment import Annex.Hook +import Annex.InodeSentinal import Upgrade #ifndef mingw32_HOST_OS import Utility.UserInfo @@ -96,7 +96,7 @@ initialize' = do , unlessM isBare switchHEADBack ) - createInodeSentinalFile + createInodeSentinalFile False uninitialize :: Annex () uninitialize = do diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs new file mode 100644 index 0000000000..7047a405c5 --- /dev/null +++ b/Annex/InodeSentinal.hs @@ -0,0 +1,67 @@ +{- git-annex inode sentinal file + - + - Copyright 2012-2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.InodeSentinal where + +import Common.Annex +import qualified Annex +import Utility.InodeCache +import Annex.Perms + +{- Some filesystems get new inodes each time they are mounted. + - In order to work on such a filesystem, a sentinal file is used to detect + - when the inodes have changed. + - + - If the sentinal file does not exist, we have to assume that the + - inodes have changed. + -} +inodesChanged :: Annex Bool +inodesChanged = sentinalInodesChanged <$> sentinalStatus + +withTSDelta :: (TSDelta -> Annex a) -> Annex a +withTSDelta a = a =<< getTSDelta + +getTSDelta :: Annex TSDelta +#ifdef mingw32_HOST_OS +getTSDelta = sentinalTSDelta <$> sentinalStatus +#else +getTSDelta = pure noTSDelta -- optimisation +#endif + +sentinalStatus :: Annex SentinalStatus +sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus + where + check = do + sc <- liftIO . checkSentinalFile =<< annexSentinalFile + Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc } + return sc + +{- The sentinal file is only created when first initializing a repository. + - If there are any annexed objects in the repository already, creating + - the file would invalidate their inode caches. -} +createInodeSentinalFile :: Bool -> Annex () +createInodeSentinalFile evenwithobjects = + unlessM (alreadyexists <||> hasobjects) $ do + s <- annexSentinalFile + createAnnexDirectory (parentDir (sentinalFile s)) + liftIO $ writeSentinalFile s + where + alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile + hasobjects + | evenwithobjects = pure False + | otherwise = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir + +annexSentinalFile :: Annex SentinalFile +annexSentinalFile = do + sentinalfile <- fromRepo gitAnnexInodeSentinal + sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache + return SentinalFile + { sentinalFile = sentinalfile + , sentinalCacheFile = sentinalcachefile + } diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 59ca69e88e..745047d9dc 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -30,6 +30,7 @@ import Config import Annex.Content import Annex.Link import Annex.CatFile +import Annex.InodeSentinal import qualified Annex import Utility.InodeCache import Annex.Content.Direct diff --git a/Command/Add.hs b/Command/Add.hs index 27c11eab43..f4bdc70c98 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -32,6 +32,7 @@ import Annex.FileMatcher import Annex.ReplaceFile import Utility.Tmp import Utility.CopyFile +import Annex.InodeSentinal import Control.Exception (IOException) diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs index cf273bb16a..e4501302dd 100644 --- a/Upgrade/V5.hs +++ b/Upgrade/V5.hs @@ -9,10 +9,17 @@ module Upgrade.V5 where import Common.Annex import Config +import Annex.InodeSentinal upgrade :: Bool -> Annex Bool upgrade automatic = do unless automatic $ showAction "v5 to v6" configureSmudgeFilter + -- Inode sentinal file was only used in direct mode and when + -- locking down files as they were added. In v6, it's used more + -- extensively, so make sure it exists, since old repos that didn't + -- use direct mode may not have created it. + unlessM (isDirect) $ + createInodeSentinalFile True return True diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 373c655617..60cc65f3fc 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -325,12 +325,12 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list -* inAnnex check should fail in the case where an annexed objects is unlocked +* inAnnex check should fail in the case where an annexed object is unlocked and has had its content changed. Could use an InodeCache for such objects. This parallels how inAnnex checks work for direct mode. * Reconcile staged changes into the associated files database, whenever the database is queried. -* See if the case where the associated files database is not used can be +* See if the cases where the associated files database is not used can be optimised. Eg, if the associated files database doesn't exist at all, we know smudge/clean are not used, so queries for associated files don't need to open the database or do reconciliation, but can simply return none. From 5e8c628d2e6e5c86b78765ca9836f267ee276cab Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 17:00:37 -0400 Subject: [PATCH 28/96] add inode cache to the db Renamed the db to keys, since it is various info about a Keys. Dropping a key will update its pointer files, as long as their content can be verified to be unmodified. This falls back to checksum verification, but I want it to use an InodeCache of the key, for speed. But, I have not made anything populate that cache yet. --- Annex.hs | 6 +- Annex/Content.hs | 39 ++++++++++-- Annex/Content/Direct.hs | 9 --- Annex/InodeSentinal.hs | 9 +++ Command/Smudge.hs | 6 +- Database/{AssociatedFiles.hs => Keys.hs} | 66 ++++++++++++++------- Database/{AssociatedFiles => Keys}/Types.hs | 4 +- Database/Types.hs | 15 ++++- Locations.hs | 16 ++--- 9 files changed, 117 insertions(+), 53 deletions(-) rename Database/{AssociatedFiles.hs => Keys.hs} (54%) rename Database/{AssociatedFiles => Keys}/Types.hs (64%) diff --git a/Annex.hs b/Annex.hs index 5c9ec4cd41..c4df0b92f6 100644 --- a/Annex.hs +++ b/Annex.hs @@ -60,7 +60,7 @@ import Types.NumCopies import Types.LockCache import Types.DesktopNotify import Types.CleanupActions -import qualified Database.AssociatedFiles.Types +import qualified Database.Keys.Types #ifdef WITH_QUVI import Utility.Quvi (QuviVersion) #endif @@ -135,7 +135,7 @@ data AnnexState = AnnexState , desktopnotify :: DesktopNotify , workers :: [Either AnnexState (Async AnnexState)] , concurrentjobs :: Maybe Int - , associatedfilesdbhandle :: Maybe Database.AssociatedFiles.Types.DbHandle + , keysdbhandle :: Maybe Database.Keys.Types.DbHandle } newState :: GitConfig -> Git.Repo -> AnnexState @@ -181,7 +181,7 @@ newState c r = AnnexState , desktopnotify = mempty , workers = [] , concurrentjobs = Nothing - , associatedfilesdbhandle = Nothing + , keysdbhandle = Nothing } {- Makes an Annex state object for the specified git repo. diff --git a/Annex/Content.hs b/Annex/Content.hs index 564bc2dcae..a530245b3a 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -73,7 +73,8 @@ import qualified Backend import Types.NumCopies import Annex.UUID import Annex.InodeSentinal -import qualified Database.AssociatedFiles as AssociatedFiles +import Utility.InodeCache +import qualified Database.Keys {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool @@ -447,10 +448,10 @@ moveAnnex key src = withObjectLoc key storeobject storedirect ( alreadyhave , modifyContent dest $ do liftIO $ moveFile src dest - fs <- AssociatedFiles.getDb key + fs <- Database.Keys.getAssociatedFiles key if null fs then freezeContent dest - else mapM_ (populateAssociatedFile key dest) fs + else mapM_ (populatePointerFile key dest) fs ) storeindirect = storeobject =<< calcRepo (gitAnnexLocation key) @@ -480,8 +481,8 @@ moveAnnex key src = withObjectLoc key storeobject storedirect alreadyhave = liftIO $ removeFile src -populateAssociatedFile :: Key -> FilePath -> FilePath -> Annex () -populateAssociatedFile k obj f = go =<< isPointerFile f +populatePointerFile :: Key -> FilePath -> FilePath -> Annex () +populatePointerFile k obj f = go =<< isPointerFile f where go (Just k') | k == k' = liftIO $ do nukeFile f @@ -598,6 +599,8 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect secureErase file liftIO $ nukeFile file removeInodeCache key + mapM_ (void . tryIO . resetPointerFile key) + =<< Database.Keys.getAssociatedFiles key removedirect fs = do cache <- recordedInodeCache key removeInodeCache key @@ -607,6 +610,32 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect secureErase f replaceFile f $ makeAnnexLink l +{- To safely reset a pointer file, it has to be the unmodified content of + - the key. The expensive way to tell is to do a verification of its content. + - The cheaper way is to see if the InodeCache for the key matches the + - file. + -} +resetPointerFile :: Key -> FilePath -> Annex () +resetPointerFile key f = go =<< geti + where + go Nothing = noop + go (Just fc) = ifM (cheapcheck fc <||> expensivecheck fc) + ( do + secureErase f + liftIO $ nukeFile f + liftIO $ writeFile f (formatPointer key) + , noop + ) + cheapcheck fc = maybe (return False) (compareInodeCaches fc) + =<< Database.Keys.getInodeCache key + expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f) + -- The file could have been modified while it was + -- being verified. Detect that. + ( geti >>= maybe (return False) (compareInodeCaches fc) + , return False + ) + geti = withTSDelta (liftIO . genInodeCache f) + {- Runs the secure erase command if set, otherwise does nothing. - File may or may not be deleted at the end; caller is responsible for - making sure it's deleted. -} diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 1edcbaed51..3d2ab1c585 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -180,15 +180,6 @@ sameFileStatus key f status = do ([], Nothing) -> return True _ -> return False -{- If the inodes have changed, only the size and mtime are compared. -} -compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool -compareInodeCaches x y - | compareStrong x y = return True - | otherwise = ifM inodesChanged - ( return $ compareWeak x y - , return False - ) - elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool elemInodeCaches _ [] = return False elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs index 7047a405c5..450e3b9677 100644 --- a/Annex/InodeSentinal.hs +++ b/Annex/InodeSentinal.hs @@ -14,6 +14,15 @@ import qualified Annex import Utility.InodeCache import Annex.Perms +{- If the inodes have changed, only the size and mtime are compared. -} +compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool +compareInodeCaches x y + | compareStrong x y = return True + | otherwise = ifM inodesChanged + ( return $ compareWeak x y + , return False + ) + {- Some filesystems get new inodes each time they are mounted. - In order to work on such a filesystem, a sentinal file is used to detect - when the inodes have changed. diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 9ce95d4ef2..b532ac3d1f 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -16,7 +16,7 @@ import Annex.FileMatcher import Types.KeySource import Backend import Logs.Location -import qualified Database.AssociatedFiles as AssociatedFiles +import qualified Database.Keys import qualified Data.ByteString.Lazy as B @@ -103,5 +103,5 @@ emitPointer = putStrLn . formatPointer updateAssociatedFiles :: Key -> FilePath -> Annex () updateAssociatedFiles k f = do - AssociatedFiles.addDb k f - AssociatedFiles.flushDb + Database.Keys.addAssociatedFile k f + Database.Keys.flushDb diff --git a/Database/AssociatedFiles.hs b/Database/Keys.hs similarity index 54% rename from Database/AssociatedFiles.hs rename to Database/Keys.hs index d17eb8112d..092c0d9004 100644 --- a/Database/AssociatedFiles.hs +++ b/Database/Keys.hs @@ -1,4 +1,4 @@ -{- Sqlite database used for tracking a key's associated files. +{- Sqlite database of information about Keys - - Copyright 2015 Joey Hess -: @@ -10,19 +10,22 @@ {-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes #-} -module Database.AssociatedFiles ( +module Database.Keys ( DbHandle, openDb, flushDb, closeDb, - addDb, - getDb, - removeDb, + addAssociatedFile, + getAssociatedFiles, + removeAssociatedFile, + setInodeCache, + getInodeCache, AssociatedId, + DataId, ) where import Database.Types -import Database.AssociatedFiles.Types +import Database.Keys.Types import qualified Database.Handle as H import Locations import Common hiding (delete) @@ -31,30 +34,35 @@ import Types.Key import Annex.Perms import Annex.LockFile import Messages +import Utility.InodeCache import Database.Persist.TH import Database.Esqueleto hiding (Key) -share [mkPersist sqlSettings, mkMigrate "migrateAssociated"] [persistLowerCase| +share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| Associated key SKey file FilePath KeyFileIndex key file +Data + key SKey + inodeCache SInodeCache + KeyIndex key |] {- Opens the database, creating it if it doesn't exist yet. -} openDb :: Annex DbHandle -openDb = withExclusiveLock gitAnnexAssociatedFilesDbLock $ do - dbdir <- fromRepo gitAnnexAssociatedFilesDb +openDb = withExclusiveLock gitAnnexKeysDbLock $ do + dbdir <- fromRepo gitAnnexKeysDb let db = dbdir "db" unlessM (liftIO $ doesFileExist db) $ do liftIO $ do createDirectoryIfMissing True dbdir H.initDb db $ void $ - runMigrationSilent migrateAssociated + runMigrationSilent migrateKeysDb setAnnexDirPerm dbdir setAnnexFilePerm db - h <- liftIO $ H.openDb db "associated" + h <- liftIO $ H.openDb db "data" -- work around https://github.com/yesodweb/persistent/issues/474 liftIO setConsoleEncoding @@ -70,19 +78,19 @@ withDbHandle a = do liftIO $ a h dbHandle :: Annex DbHandle -dbHandle = maybe startup return =<< Annex.getState Annex.associatedfilesdbhandle +dbHandle = maybe startup return =<< Annex.getState Annex.keysdbhandle where startup = do h <- openDb - Annex.changeState $ \s -> s { Annex.associatedfilesdbhandle = Just h } + Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h } return h {- Flushes any changes made to the database. -} flushDb :: Annex () flushDb = withDbHandle H.flushQueueDb -addDb :: Key -> FilePath -> Annex () -addDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do +addAssociatedFile :: Key -> FilePath -> Annex () +addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do -- If the same file was associated with a different key before, -- remove that. delete $ from $ \r -> do @@ -91,21 +99,35 @@ addDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do where sk = toSKey k -{- Note that the files returned used to be associated with the key, but +{- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} -getDb :: Key -> Annex [FilePath] -getDb k = withDbHandle $ \h -> H.queryDb h $ getDb' $ toSKey k +getAssociatedFiles :: Key -> Annex [FilePath] +getAssociatedFiles k = withDbHandle $ \h -> H.queryDb h $ + getAssociatedFiles' $ toSKey k -getDb' :: SKey -> SqlPersistM [FilePath] -getDb' sk = do +getAssociatedFiles' :: SKey -> SqlPersistM [FilePath] +getAssociatedFiles' sk = do l <- select $ from $ \r -> do where_ (r ^. AssociatedKey ==. val sk) return (r ^. AssociatedFile) return $ map unValue l -removeDb :: Key -> FilePath -> Annex () -removeDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ +removeAssociatedFile :: Key -> FilePath -> Annex () +removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ delete $ from $ \r -> do where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f) where sk = toSKey k + +setInodeCache :: Key -> InodeCache -> Annex () +setInodeCache k i = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ + void $ upsert (Data (toSKey k) (toSInodeCache i)) [] + +getInodeCache :: Key -> Annex (Maybe (InodeCache)) +getInodeCache k = withDbHandle $ \h -> H.queryDb h $ do + l <- select $ from $ \r -> do + where_ (r ^. DataKey ==. val sk) + return (r ^. DataInodeCache) + return $ headMaybe $ map (fromSInodeCache . unValue) l + where + sk = toSKey k diff --git a/Database/AssociatedFiles/Types.hs b/Database/Keys/Types.hs similarity index 64% rename from Database/AssociatedFiles/Types.hs rename to Database/Keys/Types.hs index 8c32dcf222..a627b3ca5b 100644 --- a/Database/AssociatedFiles/Types.hs +++ b/Database/Keys/Types.hs @@ -1,11 +1,11 @@ -{- Sqlite database used for tracking a key's associated files, data types. +{- Sqlite database of information about Keys, data types. - - Copyright 2015 Joey Hess -: - Licensed under the GNU GPL version 3 or higher. -} -module Database.AssociatedFiles.Types ( +module Database.Keys.Types ( DbHandle(..) ) where diff --git a/Database/Types.hs b/Database/Types.hs index dee56832b1..1476a693ae 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -13,6 +13,7 @@ import Database.Persist.TH import Data.Maybe import Types.Key +import Utility.InodeCache -- A serialized Key newtype SKey = SKey String @@ -22,6 +23,18 @@ toSKey :: Key -> SKey toSKey = SKey . key2file fromSKey :: SKey -> Key -fromSKey (SKey s) = fromMaybe (error $ "bad serialied key " ++ s) (file2key s) +fromSKey (SKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s) derivePersistField "SKey" + +-- A serialized InodeCache +newtype SInodeCache = I String + deriving (Show, Read) + +toSInodeCache :: InodeCache -> SInodeCache +toSInodeCache = I . showInodeCache + +fromSInodeCache :: SInodeCache -> InodeCache +fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s) + +derivePersistField "SInodeCache" diff --git a/Locations.hs b/Locations.hs index 6082957c74..200297321f 100644 --- a/Locations.hs +++ b/Locations.hs @@ -29,8 +29,8 @@ module Locations ( gitAnnexBadDir, gitAnnexBadLocation, gitAnnexUnusedLog, - gitAnnexAssociatedFilesDb, - gitAnnexAssociatedFilesDbLock, + gitAnnexKeysDb, + gitAnnexKeysDbLock, gitAnnexFsckState, gitAnnexFsckDbDir, gitAnnexFsckDbLock, @@ -239,13 +239,13 @@ gitAnnexBadLocation key r = gitAnnexBadDir r keyFile key gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath gitAnnexUnusedLog prefix r = gitAnnexDir r (prefix ++ "unused") -{- .git/annex/map/ contains a database for the associated files map -} -gitAnnexAssociatedFilesDb :: Git.Repo -> FilePath -gitAnnexAssociatedFilesDb r = gitAnnexDir r "map" +{- .git/annex/keys/ contains a database of information about keys. -} +gitAnnexKeysDb :: Git.Repo -> FilePath +gitAnnexKeysDb r = gitAnnexDir r "keys" -{- Lock file for the associated files map database. -} -gitAnnexAssociatedFilesDbLock :: Git.Repo -> FilePath -gitAnnexAssociatedFilesDbLock r = gitAnnexAssociatedFilesDb r ++ "lck" +{- Lock file for the keys database. -} +gitAnnexKeysDbLock :: Git.Repo -> FilePath +gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ "lck" {- .git/annex/fsck/uuid/ is used to store information about incremental - fscks. -} From ce73a96e4e5090e5b7618a564dc9405bec6c3de8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 17:47:05 -0400 Subject: [PATCH 29/96] use InodeCache when dropping a key to see if a pointer file can be safely reset The Keys database can hold multiple inode caches for a given key. One for the annex object, and one for each pointer file, which may not be hard linked to it. Inode caches for a key are recorded when its content is added to the annex, but only if it has known pointer files. This is to avoid the overhead of maintaining the database when not needed. When the smudge filter outputs a file's content, the inode cache is not updated, because git's smudge interface doesn't let us write the file. So, dropping will fall back to doing an expensive verification then. Ideally, git's interface would be improved, and then the inode cache could be updated then too. --- Annex/Action.hs | 2 ++ Annex/Content.hs | 22 +++++++++++----- Command/Smudge.hs | 9 ++----- Database/Keys.hs | 60 ++++++++++++++++++++++++++++--------------- Utility/InodeCache.hs | 2 +- doc/todo/smudge.mdwn | 13 ++-------- 6 files changed, 62 insertions(+), 46 deletions(-) diff --git a/Annex/Action.hs b/Annex/Action.hs index f59c9c2f45..348487e7c9 100644 --- a/Annex/Action.hs +++ b/Annex/Action.hs @@ -17,6 +17,7 @@ import System.Posix.Signals import Common.Annex import qualified Annex import Annex.Content +import qualified Database.Keys {- Actions to perform each time ran. -} startup :: Annex () @@ -32,4 +33,5 @@ shutdown :: Bool -> Annex () shutdown nocommit = do saveState nocommit sequence_ =<< M.elems <$> Annex.getState Annex.cleanup + Database.Keys.shutdown liftIO reapZombies -- zombies from long-running git processes diff --git a/Annex/Content.hs b/Annex/Content.hs index a530245b3a..e635b97a3d 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -451,7 +451,9 @@ moveAnnex key src = withObjectLoc key storeobject storedirect fs <- Database.Keys.getAssociatedFiles key if null fs then freezeContent dest - else mapM_ (populatePointerFile key dest) fs + else do + mapM_ (populatePointerFile key dest) fs + Database.Keys.storeInodeCaches key (dest:fs) ) storeindirect = storeobject =<< calcRepo (gitAnnexLocation key) @@ -505,7 +507,9 @@ linkAnnex key src = do ( return LinkAnnexNoop , modifyContent dest $ ifM (liftIO $ createLinkOrCopy src dest) - ( return LinkAnnexOk + ( do + Database.Keys.storeInodeCaches key [dest, src] + return LinkAnnexOk , return LinkAnnexFailed ) ) @@ -601,6 +605,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect removeInodeCache key mapM_ (void . tryIO . resetPointerFile key) =<< Database.Keys.getAssociatedFiles key + Database.Keys.removeInodeCaches key removedirect fs = do cache <- recordedInodeCache key removeInodeCache key @@ -613,8 +618,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect {- To safely reset a pointer file, it has to be the unmodified content of - the key. The expensive way to tell is to do a verification of its content. - The cheaper way is to see if the InodeCache for the key matches the - - file. - -} + - file. -} resetPointerFile :: Key -> FilePath -> Annex () resetPointerFile key f = go =<< geti where @@ -624,10 +628,14 @@ resetPointerFile key f = go =<< geti secureErase f liftIO $ nukeFile f liftIO $ writeFile f (formatPointer key) - , noop + -- Can't delete the pointer file. + -- If it was a hard link to the annex object, + -- that object might have been frozen as part of the + -- removal process, so thaw it. + , thawContent f ) - cheapcheck fc = maybe (return False) (compareInodeCaches fc) - =<< Database.Keys.getInodeCache key + cheapcheck fc = anyM (compareInodeCaches fc) + =<< Database.Keys.getInodeCaches key expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f) -- The file could have been modified while it was -- being verified. Detect that. diff --git a/Command/Smudge.hs b/Command/Smudge.hs index b532ac3d1f..14d3a7f41d 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -48,7 +48,7 @@ smudge file = do case parseLinkOrPointer b of Nothing -> liftIO $ B.putStr b Just k -> do - updateAssociatedFiles k file + Database.Keys.addAssociatedFile k file content <- calcRepo (gitAnnexLocation k) liftIO $ B.hPut stdout . fromMaybe b =<< catchMaybeIO (B.readFile content) @@ -65,7 +65,7 @@ clean file = do else ifM (shouldAnnex file) ( do k <- ingest file - updateAssociatedFiles k file + Database.Keys.addAssociatedFile k file liftIO $ emitPointer k , liftIO $ B.hPut stdout b ) @@ -100,8 +100,3 @@ ingest file = do emitPointer :: Key -> IO () emitPointer = putStrLn . formatPointer - -updateAssociatedFiles :: Key -> FilePath -> Annex () -updateAssociatedFiles k f = do - Database.Keys.addAssociatedFile k f - Database.Keys.flushDb diff --git a/Database/Keys.hs b/Database/Keys.hs index 092c0d9004..78d583d633 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -13,15 +13,17 @@ module Database.Keys ( DbHandle, openDb, - flushDb, closeDb, + shutdown, addAssociatedFile, getAssociatedFiles, removeAssociatedFile, - setInodeCache, - getInodeCache, + storeInodeCaches, + addInodeCaches, + getInodeCaches, + removeInodeCaches, AssociatedId, - DataId, + ContentId, ) where import Database.Types @@ -35,6 +37,7 @@ import Annex.Perms import Annex.LockFile import Messages import Utility.InodeCache +import Annex.InodeSentinal import Database.Persist.TH import Database.Esqueleto hiding (Key) @@ -44,10 +47,10 @@ Associated key SKey file FilePath KeyFileIndex key file -Data +Content key SKey - inodeCache SInodeCache - KeyIndex key + cache SInodeCache + KeyCacheIndex key cache |] {- Opens the database, creating it if it doesn't exist yet. -} @@ -62,7 +65,7 @@ openDb = withExclusiveLock gitAnnexKeysDbLock $ do runMigrationSilent migrateKeysDb setAnnexDirPerm dbdir setAnnexFilePerm db - h <- liftIO $ H.openDb db "data" + h <- liftIO $ H.openDb db "content" -- work around https://github.com/yesodweb/persistent/issues/474 liftIO setConsoleEncoding @@ -85,9 +88,12 @@ dbHandle = maybe startup return =<< Annex.getState Annex.keysdbhandle Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h } return h -{- Flushes any changes made to the database. -} -flushDb :: Annex () -flushDb = withDbHandle H.flushQueueDb +shutdown :: Annex () +shutdown = maybe noop go =<< Annex.getState Annex.keysdbhandle + where + go h = do + Annex.changeState $ \s -> s { Annex.keysdbhandle = Nothing } + liftIO $ closeDb h addAssociatedFile :: Key -> FilePath -> Annex () addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do @@ -113,21 +119,35 @@ getAssociatedFiles' sk = do return $ map unValue l removeAssociatedFile :: Key -> FilePath -> Annex () -removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ +removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ delete $ from $ \r -> do where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f) where sk = toSKey k -setInodeCache :: Key -> InodeCache -> Annex () -setInodeCache k i = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ - void $ upsert (Data (toSKey k) (toSInodeCache i)) [] +{- Stats the files, and stores their InodeCaches. -} +storeInodeCaches :: Key -> [FilePath] -> Annex () +storeInodeCaches k fs = withTSDelta $ \d -> + addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs) -getInodeCache :: Key -> Annex (Maybe (InodeCache)) -getInodeCache k = withDbHandle $ \h -> H.queryDb h $ do +addInodeCaches :: Key -> [InodeCache] -> Annex () +addInodeCaches k is = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ + forM_ is $ \i -> insertUnique $ Content (toSKey k) (toSInodeCache i) + +{- A key may have multiple InodeCaches; one for the annex object, and one + - for each pointer file that is a copy of it. -} +getInodeCaches :: Key -> Annex [InodeCache] +getInodeCaches k = withDbHandle $ \h -> H.queryDb h $ do l <- select $ from $ \r -> do - where_ (r ^. DataKey ==. val sk) - return (r ^. DataInodeCache) - return $ headMaybe $ map (fromSInodeCache . unValue) l + where_ (r ^. ContentKey ==. val sk) + return (r ^. ContentCache) + return $ map (fromSInodeCache . unValue) l + where + sk = toSKey k + +removeInodeCaches :: Key -> Annex () +removeInodeCaches k = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ + delete $ from $ \r -> do + where_ (r ^. ContentKey ==. val sk) where sk = toSKey k diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index b5fe9034e7..8bd7ae0cd2 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -1,7 +1,7 @@ {- Caching a file's inode, size, and modification time - to see when it's changed. - - - Copyright 2013, 2014 Joey Hess + - Copyright 2013-2014 Joey Hess - - License: BSD-2-clause -} diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 60cc65f3fc..cc8da67d00 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -328,6 +328,8 @@ files to be unlocked, while the indirect upgrades don't touch the files. * inAnnex check should fail in the case where an annexed object is unlocked and has had its content changed. Could use an InodeCache for such objects. This parallels how inAnnex checks work for direct mode. +* Also, Annex.Content.prepSendAnnex should check the InodeCache for + changes. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the associated files database is not used can be @@ -338,17 +340,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. (when not in direct mode). However, beware over-optimisation breaking the assistant or perhaps other long-lived processes. -* Update pointer files when dropping the content of a key. - - Check the associated files database to find associated files for the key. - - Verify that worktree files are not modified from the annexed object. - How? InodeCache could be maintained, but the smudge filer interface - wouldn't let it be updated when smudging a file. May need to take - an expensive path: - 1. stat object file - 2. stat worktree file - 3. if same stat, ok else hash worktree file - 4. stat worktree file again after checking hash; make sure it's - unchanged from earlier stat * Convert `git annex unlock` to stage a pointer file, and hard link to the annexed object (or write pointer file if annexed object not present). - Also needs to thaw annex object file From aa88851ec1b2f03c551ba6e67b2a96a41f180163 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 19:38:37 -0400 Subject: [PATCH 30/96] reorder --- Annex/Content.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index e635b97a3d..d73132a0e7 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -602,10 +602,10 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect remove file = cleanObjectLoc key $ do secureErase file liftIO $ nukeFile file - removeInodeCache key mapM_ (void . tryIO . resetPointerFile key) =<< Database.Keys.getAssociatedFiles key Database.Keys.removeInodeCaches key + removeInodeCache key removedirect fs = do cache <- recordedInodeCache key removeInodeCache key From 3719d1b390099736533d79f004793b09b6f1c618 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Dec 2015 19:43:15 -0400 Subject: [PATCH 31/96] make clear when code is using deprecated direct mode files --- Annex/Content.hs | 28 ++++++++++++++-------------- debian/changelog | 3 +++ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index d73132a0e7..12859c856e 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -63,7 +63,7 @@ import Config import Git.SharedRepository import Annex.Perms import Annex.Link -import Annex.Content.Direct +import qualified Annex.Content.Direct as Direct import Annex.ReplaceFile import Annex.LockPool import Messages.Progress @@ -100,7 +100,7 @@ inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect checkdirect (loc:locs) = do r <- check loc if isgood r - then ifM (goodContent key loc) + then ifM (Direct.goodContent key loc) ( return r , checkdirect locs ) @@ -471,12 +471,12 @@ moveAnnex key src = withObjectLoc key storeobject storedirect v <- isAnnexLink f if Just key == v then do - updateInodeCache key src + Direct.updateInodeCache key src replaceFile f $ liftIO . moveFile src chmodContent f forM_ fs $ - addContentWhenNotPresent key f - else ifM (goodContent key f) + Direct.addContentWhenNotPresent key f + else ifM (Direct.goodContent key f) ( storedirect' alreadyhave fs , storedirect' fallback fs ) @@ -551,10 +551,10 @@ prepSendAnnex key = withObjectLoc key indirect direct indirect f = return $ Just (f, return True) direct [] = return Nothing direct (f:fs) = do - cache <- recordedInodeCache key + cache <- Direct.recordedInodeCache key -- check that we have a good file - ifM (sameInodeCache f cache) - ( return $ Just (f, sameInodeCache f cache) + ifM (Direct.sameInodeCache f cache) + ( return $ Just (f, Direct.sameInodeCache f cache) , direct fs ) @@ -566,7 +566,7 @@ prepSendAnnex key = withObjectLoc key indirect direct withObjectLoc :: Key -> (FilePath -> Annex a) -> ([FilePath] -> Annex a) -> Annex a withObjectLoc key indirect direct = ifM isDirect ( do - fs <- associatedFiles key + fs <- Direct.associatedFiles key if null fs then goindirect else direct fs @@ -605,12 +605,12 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect mapM_ (void . tryIO . resetPointerFile key) =<< Database.Keys.getAssociatedFiles key Database.Keys.removeInodeCaches key - removeInodeCache key + Direct.removeInodeCache key removedirect fs = do - cache <- recordedInodeCache key - removeInodeCache key + cache <- Direct.recordedInodeCache key + Direct.removeInodeCache key mapM_ (resetfile cache) fs - resetfile cache f = whenM (sameInodeCache f cache) $ do + resetfile cache f = whenM (Direct.sameInodeCache f cache) $ do l <- calcRepo $ gitAnnexLink f key secureErase f replaceFile f $ makeAnnexLink l @@ -713,7 +713,7 @@ getKeysPresent keyloc = do InRepository -> case fileKey (takeFileName d) of Nothing -> return False Just k -> Annex.eval s $ - anyM (goodContent k) =<< associatedFiles k + anyM (Direct.goodContent k) =<< Direct.associatedFiles k {- In order to run Annex monad actions within unsafeInterleaveIO, - the current state is taken and reused. No changes made to this diff --git a/debian/changelog b/debian/changelog index 97dc5fab9c..ed1e32b735 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,4 @@ +git-annex (6.20151225) unstable; urgency=medium * annex.version increased to 6, but version 5 is also still supported. * The upgrade to version 6 is not done fully automatically, because @@ -8,6 +9,8 @@ * init: Configure .git/info/attributes to use git-annex as a smudge filter. Note that this changes the default behavior of git add in a newly initialized repository; it will add files to the annex. + + -- Joey Hess Tue, 08 Dec 2015 11:14:03 -0400 git-annex (5.20151208) unstable; urgency=medium From 3b2a7f216d3c6aa9d82b275f83fd1e07de0f8be4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 10 Dec 2015 14:20:38 -0400 Subject: [PATCH 32/96] move --- Annex/Content/Direct.hs | 15 --------------- Annex/InodeSentinal.hs | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 3d2ab1c585..59bea8f99b 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -162,14 +162,6 @@ removeInodeCache key = withInodeCacheFile key $ \f -> withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key) -{- Checks if a InodeCache matches the current version of a file. -} -sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool -sameInodeCache _ [] = return False -sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) - where - go Nothing = return False - go (Just curr) = elemInodeCaches curr old - {- Checks if a FileStatus matches the recorded InodeCache of a file. -} sameFileStatus :: Key -> FilePath -> FileStatus -> Annex Bool sameFileStatus key f status = do @@ -180,13 +172,6 @@ sameFileStatus key f status = do ([], Nothing) -> return True _ -> return False -elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool -elemInodeCaches _ [] = return False -elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) - ( return True - , elemInodeCaches c ls - ) - compareInodeCachesWith :: Annex InodeComparisonType compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs index 450e3b9677..8b48094dfc 100644 --- a/Annex/InodeSentinal.hs +++ b/Annex/InodeSentinal.hs @@ -14,7 +14,8 @@ import qualified Annex import Utility.InodeCache import Annex.Perms -{- If the inodes have changed, only the size and mtime are compared. -} +{- If the sendinal shows the inodes have changed, only the size and mtime + - are compared. -} compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool compareInodeCaches x y | compareStrong x y = return True @@ -23,6 +24,22 @@ compareInodeCaches x y , return False ) +{- Checks if one of the provided old InodeCache matches the current + - version of a file. -} +sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool +sameInodeCache _ [] = return False +sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) + where + go Nothing = return False + go (Just curr) = elemInodeCaches curr old + +elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool +elemInodeCaches _ [] = return False +elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) + ( return True + , elemInodeCaches c ls + ) + {- Some filesystems get new inodes each time they are mounted. - In order to work on such a filesystem, a sentinal file is used to detect - when the inodes have changed. From 2b8f6b8b2f2312bab5a39f1a6b420610cff8ba62 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 10 Dec 2015 14:29:34 -0400 Subject: [PATCH 33/96] check inode cache in prepSendAnnex This does mean one query of the database every time an object is sent. May impact performance. --- Annex/Content.hs | 26 ++++++++++++++++++++------ Remote/Git.hs | 2 +- doc/todo/smudge.mdwn | 2 -- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 12859c856e..bfc70ac9a8 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -518,7 +518,7 @@ data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop {- Runs an action to transfer an object's content. - - - In direct mode, it's possible for the file to change as it's being sent. + - In some cases, it's possible for the file to change as it's being sent. - If this happens, runs the rollback action and returns False. The - rollback action should remove the data that was transferred. -} @@ -538,8 +538,9 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key {- Returns a file that contains an object's content, - and a check to run after the transfer is complete. - - - In direct mode, it's possible for the file to change as it's being sent, - - and the check detects this case and returns False. + - When a file is unlocked (or in direct mode), it's possble for its + - content to change as it's being sent. The check detects this case + - and returns False. - - Note that the returned check action is, in some cases, run in the - Annex monad of the remote that is receiving the object, rather than @@ -548,13 +549,26 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex key prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool)) prepSendAnnex key = withObjectLoc key indirect direct where - indirect f = return $ Just (f, return True) + indirect f = do + cache <- Database.Keys.getInodeCaches key + cache' <- if null cache + -- Since no inode cache is in the database, this + -- object is not currently unlocked. But that could + -- change while the transfer is in progress, so + -- generate an inode cache for the starting + -- content. + then maybeToList <$> + withTSDelta (liftIO . genInodeCache f) + else pure cache + return $ if null cache' + then Nothing + else Just (f, sameInodeCache f cache') direct [] = return Nothing direct (f:fs) = do cache <- Direct.recordedInodeCache key -- check that we have a good file - ifM (Direct.sameInodeCache f cache) - ( return $ Just (f, Direct.sameInodeCache f cache) + ifM (sameInodeCache f cache) + ( return $ Just (f, sameInodeCache f cache) , direct fs ) diff --git a/Remote/Git.hs b/Remote/Git.hs index 890e40b514..6dc5345c94 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -672,7 +672,7 @@ wantHardLink :: Annex Bool wantHardLink = (annexHardLink <$> Annex.getGitConfig) <&&> (not <$> isDirect) -- Copies from src to dest, updating a meter. If the copy finishes --- successfully, calls a final check action, which must also success, or +-- successfully, calls a final check action, which must also succeed, or -- returns false. -- -- If either the remote or local repository wants to use hard links, diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index cc8da67d00..c203c95663 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -328,8 +328,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. * inAnnex check should fail in the case where an annexed object is unlocked and has had its content changed. Could use an InodeCache for such objects. This parallels how inAnnex checks work for direct mode. -* Also, Annex.Content.prepSendAnnex should check the InodeCache for - changes. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the associated files database is not used can be From f80a3d8cd060405cdf66f5f45521dec757399707 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 10 Dec 2015 14:51:04 -0400 Subject: [PATCH 34/96] check InodeCache in inAnnex et al This avoids querying the database when the content file doen't exist (or otherwise fails the provided check). However, it does add overhead of querying the database, and will certianly impact performance. --- Annex/Content.hs | 19 +++++++++++++++---- doc/todo/smudge.mdwn | 9 +++------ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index bfc70ac9a8..44f1ad0a02 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -84,7 +84,10 @@ inAnnex key = inAnnexCheck key $ liftIO . doesFileExist inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool inAnnexCheck key check = inAnnex' id False check key -{- Generic inAnnex, handling both indirect and direct mode. +{- inAnnex that performs an arbitrary check of the key's content. + - + - When the content is unlocked, it must also be unmodified, or the bad + - value will be returned. - - In direct mode, at least one of the associated files must pass the - check. Additionally, the file must be unmodified. @@ -93,9 +96,17 @@ inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect where checkindirect loc = do - whenM (fromRepo Git.repoIsUrl) $ - error "inAnnex cannot check remote repo" - check loc + r <- check loc + if isgood r + then do + cache <- Database.Keys.getInodeCaches key + if null cache + then return r + else ifM (sameInodeCache loc cache) + ( return r + , return bad + ) + else return bad checkdirect [] = return bad checkdirect (loc:locs) = do r <- check loc diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index c203c95663..56a79e4d10 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -325,14 +325,11 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list -* inAnnex check should fail in the case where an annexed object is unlocked - and has had its content changed. Could use an InodeCache for - such objects. This parallels how inAnnex checks work for direct mode. * Reconcile staged changes into the associated files database, whenever the database is queried. -* See if the cases where the associated files database is not used can be - optimised. Eg, if the associated files database doesn't exist at all, - we know smudge/clean are not used, so queries for associated files don't +* See if the cases where the Keys database is not used can be + optimised. Eg, if the Keys database doesn't exist at all, + we know smudge/clean are not used, so queries don't need to open the database or do reconciliation, but can simply return none. Also, no need for Backend.lookupFile to catKeyFile in this case (when not in direct mode). From 108f711d371c7e259866b2a5ba5f6784db455a88 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 10 Dec 2015 14:54:03 -0400 Subject: [PATCH 35/96] todo --- doc/todo/smudge.mdwn | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 56a79e4d10..6259d4a276 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -353,6 +353,11 @@ files to be unlocked, while the indirect upgrades don't touch the files. - Should probably automatically handle merge conflicts between annex symlinks and pointer files too. Maybe by always resulting in a pointer file, since the symlinks don't work everwhere. +* Crippled filesystem should cause all files to be transparently unlocked. + Note that this presents problems when dealing with merge conflicts and + when pushing changes committed in such a repo. Ideally, should avoid + committing implicit unlocks, or should prevent such commits leaking out + in pushes. ---- From 06a8256bf6f3087801eae67aa2baa421525b27d6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 10 Dec 2015 16:06:58 -0400 Subject: [PATCH 36/96] always format pointer file with a trailing newline Before the smudge filter added a trailing newline, but other things that wrote formatPointer to a file did not. also some new pointer staging code to use later --- Annex/Link.hs | 16 ++++++++++++++-- Command/Smudge.hs | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Annex/Link.hs b/Annex/Link.hs index f405403f26..61c61b5613 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -110,12 +110,23 @@ hashSymlink' :: Git.HashObject.HashObjectHandle -> LinkTarget -> Annex Sha hashSymlink' h linktarget = liftIO $ Git.HashObject.hashBlob h $ toInternalGitPath linktarget -{- Stages a symlink to the annex, using a Sha of its target. -} +{- Stages a symlink to an annexed object, using a Sha of its target. -} stageSymlink :: FilePath -> Sha -> Annex () stageSymlink file sha = Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.stageSymlink file sha) +{- Injects a pointer file content into git, returning its Sha. -} +hashPointerFile :: Key -> Annex Sha +hashPointerFile key = inRepo $ Git.HashObject.hashObject BlobObject $ + formatPointer key + +{- Stages a pointer file, using a Sha of its content -} +stagePointerFile :: FilePath -> Sha -> Annex () +stagePointerFile file sha = + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageFile sha FileBlob file) + {- Parses a symlink target or a pointer file to a Key. - Only looks at the first line, as pointer files can have subsequent - lines. -} @@ -138,7 +149,8 @@ parseLinkOrPointer' s = headMaybe (lines (fromInternalGitPath s)) >>= go | otherwise = Nothing formatPointer :: Key -> String -formatPointer k = toInternalGitPath $ pathSeparator:objectDir key2file k +formatPointer k = + toInternalGitPath (pathSeparator:objectDir key2file k) ++ "\n" {- Checks if a file is a pointer to a key. -} isPointerFile :: FilePath -> Annex (Maybe Key) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 14d3a7f41d..cd33b193ea 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -99,4 +99,4 @@ ingest file = do return k emitPointer :: Key -> IO () -emitPointer = putStrLn . formatPointer +emitPointer = putStr . formatPointer From 9dffd3d25568eedfddafd67fcd7cc33dacff909b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 10 Dec 2015 16:07:50 -0400 Subject: [PATCH 37/96] add generalized linkAnnex' --- Annex/Content.hs | 21 ++++++++++++++------- doc/todo/smudge.mdwn | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 44f1ad0a02..10a59ae95b 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -25,6 +25,7 @@ module Annex.Content ( checkDiskSpace, moveAnnex, linkAnnex, + linkAnnex', LinkAnnexResult(..), sendAnnex, prepSendAnnex, @@ -514,15 +515,21 @@ populatePointerFile k obj f = go =<< isPointerFile f linkAnnex :: Key -> FilePath -> Annex LinkAnnexResult linkAnnex key src = do dest <- calcRepo (gitAnnexLocation key) + modifyContent dest $ linkAnnex' key src dest + +{- Hard links (or copies) src to dest, one of which should be the + - annex object. -} +linkAnnex' :: Key -> FilePath -> FilePath -> Annex LinkAnnexResult +linkAnnex' key src dest = ifM (liftIO $ doesFileExist dest) ( return LinkAnnexNoop - , modifyContent dest $ - ifM (liftIO $ createLinkOrCopy src dest) - ( do - Database.Keys.storeInodeCaches key [dest, src] - return LinkAnnexOk - , return LinkAnnexFailed - ) + , ifM (liftIO $ createLinkOrCopy src dest) + ( do + thawContent dest + Database.Keys.storeInodeCaches key [dest, src] + return LinkAnnexOk + , return LinkAnnexFailed + ) ) data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 6259d4a276..dd990afc71 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -325,6 +325,8 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list +* linkAnnex should check disk reserve when it falls back to copying the + file. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the Keys database is not used can be From e2c8dc6778f12669a8374f3c8b80b1eb3cd4a5f5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 10 Dec 2015 16:12:05 -0400 Subject: [PATCH 38/96] v6 git-annex unlock Note that the implementation uses replaceFile, so that the actual replacement of the work tree file is atomic. This seems a good property to have! It would be possible for unlock in v6 mode to be run on files that do not have their content present. However, that would be a behavior change from before, and I don't see any immediate need to support it, so I didn't implement it. --- Command/Unlock.hs | 50 ++++++++++++++++++++++++++++++++------- debian/changelog | 2 ++ doc/git-annex-lock.mdwn | 2 +- doc/git-annex-unlock.mdwn | 12 ++++++++-- doc/todo/smudge.mdwn | 7 +++--- 5 files changed, 58 insertions(+), 15 deletions(-) diff --git a/Command/Unlock.hs b/Command/Unlock.hs index d1b1d0e90e..1cfd4a0b2b 100644 --- a/Command/Unlock.hs +++ b/Command/Unlock.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess + - Copyright 2010,2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,6 +11,9 @@ import Common.Annex import Command import Annex.Content import Annex.CatFile +import Annex.Version +import Annex.Link +import Annex.ReplaceFile import Utility.CopyFile cmd :: Command @@ -26,14 +29,45 @@ mkcmd n d = notDirect $ withGlobalOptions annexedMatchingOptions $ seek :: CmdParams -> CommandSeek seek = withFilesInGit $ whenAnnexed start -{- The unlock subcommand replaces the symlink with a copy of the file's - - content. -} +{- Before v6, the unlock subcommand replaces the symlink with a copy of + - the file's content. In v6 and above, it converts the file from a symlink + - to a pointer. -} start :: FilePath -> Key -> CommandStart -start file key = do - showStart "unlock" file +start file key = ifM (isJust <$> isAnnexLink file) + ( do + showStart "unlock" file + ifM (inAnnex key) + ( ifM versionSupportsUnlockedPointers + ( next $ performNew file key + , startOld file key + ) + , do + warning "content not present; cannot unlock" + next $ next $ return False + ) + , stop + ) + +performNew :: FilePath -> Key -> CommandPerform +performNew dest key = do + src <- calcRepo (gitAnnexLocation key) + replaceFile dest $ \tmp -> do + r <- linkAnnex' key src tmp + case r of + LinkAnnexOk -> return () + _ -> error "linkAnnex failed" + next $ cleanupNew dest key + +cleanupNew :: FilePath -> Key -> CommandCleanup +cleanupNew dest key = do + stagePointerFile dest =<< hashPointerFile key + return True + +startOld :: FilePath -> Key -> CommandStart +startOld file key = ifM (inAnnex key) ( ifM (isJust <$> catKeyFileHEAD file) - ( next $ perform file key + ( next $ performOld file key , do warning "this has not yet been committed to git; cannot unlock it" next $ next $ return False @@ -43,8 +77,8 @@ start file key = do next $ next $ return False ) -perform :: FilePath -> Key -> CommandPerform -perform dest key = ifM (checkDiskSpace Nothing key 0 True) +performOld :: FilePath -> Key -> CommandPerform +performOld dest key = ifM (checkDiskSpace Nothing key 0 True) ( do src <- calcRepo $ gitAnnexLocation key tmpdest <- fromRepo $ gitAnnexTmpObjectLocation key diff --git a/debian/changelog b/debian/changelog index e3ee9a34ad..1b645fd063 100644 --- a/debian/changelog +++ b/debian/changelog @@ -9,6 +9,8 @@ git-annex (6.20151225) unstable; urgency=medium * init: Configure .git/info/attributes to use git-annex as a smudge filter. Note that this changes the default behavior of git add in a newly initialized repository; it will add files to the annex. + * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a + pointer file, and this change can be committed to the git repository. -- Joey Hess Tue, 08 Dec 2015 11:14:03 -0400 diff --git a/doc/git-annex-lock.mdwn b/doc/git-annex-lock.mdwn index 4bf279fb22..b9e5d34503 100644 --- a/doc/git-annex-lock.mdwn +++ b/doc/git-annex-lock.mdwn @@ -9,7 +9,7 @@ git annex lock `[path ...]` # DESCRIPTION Use this to undo an unlock command if you don't want to modify -the files, or have made modifications you want to discard. +the files any longer, or have made modifications you want to discard. # OPTIONS diff --git a/doc/git-annex-unlock.mdwn b/doc/git-annex-unlock.mdwn index ac8c211855..1231468364 100644 --- a/doc/git-annex-unlock.mdwn +++ b/doc/git-annex-unlock.mdwn @@ -11,8 +11,16 @@ git annex unlock `[path ...]` Normally, the content of annexed files is protected from being changed. Unlocking an annexed file allows it to be modified. This replaces the symlink for each specified file with a copy of the file's content. -You can then modify it and `git annex add` (or `git commit`) to inject -it back into the annex. +You can then modify it and `git annex add` (or `git commit`) to save your +changes. + +In repositories with annex.version 5 or earlier, unlocking a file is local +to the repository, and is temporary. With version 6, unlocking a file +changes how it is stored in the git repository (from a symlink to a pointer +file), so you can commit it like any other change. Also in version 6, you +can use `git add` to add a fie to the annex in unlocked form. This allows +workflows where a file starts out unlocked, is modified as necessary, and +is locked once it reaches its final version. # OPTIONS diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index dd990afc71..72e062ff4b 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -325,6 +325,9 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list +* Dropping a smudged file causes git status to show it as modified, + because the timestamp has changed. Avoid this by preserving timestamp + of smudged files when manipulating. * linkAnnex should check disk reserve when it falls back to copying the file. * Reconcile staged changes into the associated files database, whenever @@ -337,10 +340,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. (when not in direct mode). However, beware over-optimisation breaking the assistant or perhaps other long-lived processes. -* Convert `git annex unlock` to stage a pointer file, and hard link to the - annexed object (or write pointer file if annexed object not present). - - Also needs to thaw annex object file - - Also needs to update associated files db. * Convert `git annex lock` to verify that worktree file is not modified (same check used when updating pointer files to the content of a key), and then delete the worktree file and replace with an annex symlink. From c910b4e2558e00219f0dcf896715c0b981025283 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 10:42:18 -0400 Subject: [PATCH 39/96] wip --- Annex/Content.hs | 37 +++++++++--------- Command/Lock.hs | 90 ++++++++++++++++++++++++++++++++++++++------ doc/todo/smudge.mdwn | 5 +-- 3 files changed, 100 insertions(+), 32 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 10a59ae95b..912831db53 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -41,6 +41,7 @@ module Annex.Content ( dirKeys, withObjectLoc, staleKeysPrune, + isUnmodified, ) where import System.IO.Unsafe (unsafeInterleaveIO) @@ -634,10 +635,21 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect remove file = cleanObjectLoc key $ do secureErase file liftIO $ nukeFile file - mapM_ (void . tryIO . resetPointerFile key) + mapM_ (void . tryIO . resetpointer) =<< Database.Keys.getAssociatedFiles key Database.Keys.removeInodeCaches key Direct.removeInodeCache key + resetpointer file = ifM (isUnmodified key file) + ( do + secureErase file + liftIO $ nukeFile file + liftIO $ writeFile file (formatPointer key) + -- Can't delete the pointer file. + -- If it was a hard link to the annex object, + -- that object might have been frozen as part of the + -- removal process, so thaw it. + , void $ tryIO $ thawContent file + ) removedirect fs = do cache <- Direct.recordedInodeCache key Direct.removeInodeCache key @@ -647,25 +659,16 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect secureErase f replaceFile f $ makeAnnexLink l -{- To safely reset a pointer file, it has to be the unmodified content of - - the key. The expensive way to tell is to do a verification of its content. +{- Check if a file contains the unmodified content of the key. + - + - The expensive way to tell is to do a verification of its content. - The cheaper way is to see if the InodeCache for the key matches the - file. -} -resetPointerFile :: Key -> FilePath -> Annex () -resetPointerFile key f = go =<< geti +isUnmodified :: Key -> FilePath -> Annex Bool +isUnmodified key f = go =<< geti where - go Nothing = noop - go (Just fc) = ifM (cheapcheck fc <||> expensivecheck fc) - ( do - secureErase f - liftIO $ nukeFile f - liftIO $ writeFile f (formatPointer key) - -- Can't delete the pointer file. - -- If it was a hard link to the annex object, - -- that object might have been frozen as part of the - -- removal process, so thaw it. - , thawContent f - ) + go Nothing = return False + go (Just fc) = cheapcheck fc <||> expensivecheck fc cheapcheck fc = anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f) diff --git a/Command/Lock.hs b/Command/Lock.hs index 7711ec3b8d..c425d7eb65 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess + - Copyright 2010,2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,6 +11,13 @@ import Common.Annex import Command import qualified Annex.Queue import qualified Annex +import Annex.Version +import Annex.Content +import Annex.Link +import Annex.InodeSentinal +import Utility.InodeCache +import qualified Database.Keys +import qualified Command.Add cmd :: Command cmd = notDirect $ withGlobalOptions annexedMatchingOptions $ @@ -19,18 +26,77 @@ cmd = notDirect $ withGlobalOptions annexedMatchingOptions $ paramPaths (withParams seek) seek :: CmdParams -> CommandSeek -seek ps = do - withFilesUnlocked start ps - withFilesUnlockedToBeCommitted start ps +seek ps = ifM versionSupportsUnlockedPointers + ( withFilesInGit (whenAnnexed startNew) ps + , do + withFilesUnlocked startOld ps + withFilesUnlockedToBeCommitted startOld ps + ) -start :: FilePath -> CommandStart -start file = do +startNew :: FilePath -> Key -> CommandStart +startNew file key = do showStart "lock" file - unlessM (Annex.getState Annex.force) $ - error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)" - next $ perform file + go =<< isPointerFile file + where + go (Just key') + | key' == key = cont False + | otherwise = errorModified + go Nothing = + ifM (isUnmodified key file) + ( cont False + , ifM (Annex.getState Annex.force) + ( cont True + , errorModified + ) + ) + cont = next . performNew file key -perform :: FilePath -> CommandPerform -perform file = do +performNew :: FilePath -> Key -> Bool -> CommandPerform +performNew file key filemodified = do + -- If other files use this same key, and are unlocked, + -- the annex object file might be hard linked to those files. + -- It's also possible that the annex object file was + -- modified while the file was unlocked. + -- + -- So, in order to lock the file's content, we need to break all + -- hard links to the annex object file, and if it's modified, + -- replace it with a copy of the content of one of the associated + -- files. + -- + -- When the file being locked is unmodified, the annex object file + -- can just be linked to it. (Which might already be the case, but + -- do it again to be sure.) + -- + -- When the file being locked is modified, find another associated + -- file that is unmodified, and copy it to the annex object file. + -- If there are no unmodified associated files, the content of + -- the key is lost. + -- + -- If the filesystem doesn't support hard links, none of this + -- is a concern. + obj <- calcRepo (gitAnnexLocation key) + + freezeContent obj + Command.Add.addLink file key + =<< withTSDelta (liftIO . genInodeCache file) + next $ cleanupNew file key + +cleanupNew :: FilePath -> Key -> CommandCleanup +cleanupNew file key = do + Database.Keys.removeAssociatedFile key file + return True + +startOld :: FilePath -> CommandStart +startOld file = do + showStart "lock" file + unlessM (Annex.getState Annex.force) + errorModified + next $ performOld file + +performOld :: FilePath -> CommandPerform +performOld file = do Annex.Queue.addCommand "checkout" [Param "--"] [file] - next $ return True -- no cleanup needed + next $ return True + +errorModified :: a +errorModified = error "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)" diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 72e062ff4b..cbe7a50d65 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -233,9 +233,8 @@ git annex lock/unlock: transition repositories to using pointers, and a cleaner unlock/lock for repos using symlinks. - unlock will stage a pointer file, and will copy the content of the object - out of .git/annex/objects to the work tree file. (Might want a --hardlink - switch.) + unlock will stage a pointer file, and will link the content of the object + from .git/annex/objects to the work tree file. lock will replace the current work tree file with the symlink, and stage it. Note that multiple work tree files could point to the same object. From 50e83b606c5509c594517f98caac5fb8b98f708a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 13:56:12 -0400 Subject: [PATCH 40/96] only make 1 hardlink max between pointer file and annex object If multiple files point to the same annex object, the user may want to modify them independently, so don't use a hard link. Also, check diskreserve when copying. --- Annex/Content.hs | 50 ++++++++++++++++++++++++++++++++++---------- Command/Smudge.hs | 2 +- doc/todo/smudge.mdwn | 2 -- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index d89e90f2a6..756c801ad8 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -72,11 +72,12 @@ import Messages.Progress import qualified Types.Remote import qualified Types.Backend import qualified Backend +import qualified Database.Keys import Types.NumCopies import Annex.UUID import Annex.InodeSentinal import Utility.InodeCache -import qualified Database.Keys +import Utility.PosixFiles {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool @@ -389,7 +390,7 @@ withTmp key action = do return res {- Checks that there is disk space available to store a given key, - - in a destination (or the annex) printing a warning if not. + - in a destination directory (or the annex) printing a warning if not. - - If the destination is on the same filesystem as the annex, - checks for any other running downloads, removing the amount of data still @@ -397,7 +398,12 @@ withTmp key action = do - when doing concurrent downloads. -} checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool -checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force) +checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (keySize key)) destdir key + +{- Allows specifying the size of the key, if it's known, which is useful + - as not all keys know their size. -} +checkDiskSpace' :: Integer -> Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool +checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force) ( return True , do -- We can't get inprogress and free at the same @@ -410,8 +416,8 @@ checkDiskSpace destdir key alreadythere samefilesystem = ifM (Annex.getState Ann then sizeOfDownloadsInProgress (/= key) else pure 0 free <- liftIO . getDiskFree =<< dir - case (free, fromMaybe 1 (keySize key)) of - (Just have, need) -> do + case free of + Just have -> do reserve <- annexDiskReserve <$> Annex.getGitConfig let delta = need + reserve - have - alreadythere + inprogress let ok = delta <= 0 @@ -499,14 +505,15 @@ moveAnnex key src = withObjectLoc key storeobject storedirect populatePointerFile :: Key -> FilePath -> FilePath -> Annex () populatePointerFile k obj f = go =<< isPointerFile f where - go (Just k') | k == k' = liftIO $ do - nukeFile f - unlessM (catchBoolIO $ createLinkOrCopy obj f) $ - writeFile f (formatPointer k) + go (Just k') | k == k' = do + liftIO $ nukeFile f + unlessM (linkAnnex'' k obj f) $ + liftIO $ writeFile f (formatPointer k) go _ = return () + {- Hard links a file into .git/annex/objects/, falling back to a copy - - if necessary. + - if necessary. Does nothing if the object file already exists. - - Does not lock down the hard linked object, so that the user can modify - the source file. So, adding an object to the annex this way can @@ -524,7 +531,7 @@ linkAnnex' :: Key -> FilePath -> FilePath -> Annex LinkAnnexResult linkAnnex' key src dest = ifM (liftIO $ doesFileExist dest) ( return LinkAnnexNoop - , ifM (liftIO $ createLinkOrCopy src dest) + , ifM (linkAnnex'' key src dest) ( do thawContent dest Database.Keys.storeInodeCaches key [dest, src] @@ -535,6 +542,27 @@ linkAnnex' key src dest = data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop +{- Hard links or copies src to dest. Only uses a hard link if src + - is not already hardlinked to elsewhere. Checks disk reserve before + - copying, and will fail if not enough space, or if the dest file + - already exists. -} +linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool +linkAnnex'' key src dest = catchBoolIO $ do + s <- liftIO $ getFileStatus src +#ifndef mingw32_HOST_OS + if linkCount s > 1 + then copy s + else liftIO (createLink src dest >> return True) + `catchIO` const (copy s) +#else + copy s +#endif + where + copy s = ifM (checkDiskSpace' (fromIntegral $ fileSize s) (Just $ takeDirectory dest) key 0 True) + ( liftIO $ copyFileExternal CopyAllMetaData src dest + , return False + ) + {- Runs an action to transfer an object's content. - - In some cases, it's possible for the file to change as it's being sent. diff --git a/Command/Smudge.hs b/Command/Smudge.hs index cd33b193ea..1353c27915 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -86,7 +86,7 @@ ingest file = do } k <- fst . fromMaybe (error "failed to generate a key") <$> genKey source backend - -- Hard link (or copy) file content to annex + -- Hard link (or copy) file content to annex object -- to prevent it from being lost when git checks out -- a branch not containing this file. r <- linkAnnex k file diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index cbe7a50d65..a693cf42f2 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -327,8 +327,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. * Dropping a smudged file causes git status to show it as modified, because the timestamp has changed. Avoid this by preserving timestamp of smudged files when manipulating. -* linkAnnex should check disk reserve when it falls back to copying the - file. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the Keys database is not used can be From 7790e059b2ffafaf8715c3068a5bbf294f7cd561 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 15:13:36 -0400 Subject: [PATCH 41/96] finish v6 git-annex lock This was a doozy! --- Annex/Content.hs | 20 +++++++++---- Command/Lock.hs | 69 +++++++++++++++++++++++++++----------------- doc/todo/smudge.mdwn | 15 ++-------- 3 files changed, 60 insertions(+), 44 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 756c801ad8..f0c8e25cdc 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -27,6 +27,7 @@ module Annex.Content ( linkAnnex, linkAnnex', LinkAnnexResult(..), + checkedCopyFile, sendAnnex, prepSendAnnex, removeAnnex, @@ -549,16 +550,25 @@ data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop linkAnnex'' :: Key -> FilePath -> FilePath -> Annex Bool linkAnnex'' key src dest = catchBoolIO $ do s <- liftIO $ getFileStatus src + let copy = checkedCopyFile' key src dest s #ifndef mingw32_HOST_OS if linkCount s > 1 - then copy s + then copy else liftIO (createLink src dest >> return True) - `catchIO` const (copy s) + `catchIO` const copy #else - copy s + copy #endif - where - copy s = ifM (checkDiskSpace' (fromIntegral $ fileSize s) (Just $ takeDirectory dest) key 0 True) + +{- Checks disk space before copying. -} +checkedCopyFile :: Key -> FilePath -> FilePath -> Annex Bool +checkedCopyFile key src dest = catchBoolIO $ + checkedCopyFile' key src dest + =<< liftIO (getFileStatus src) + +checkedCopyFile' :: Key -> FilePath -> FilePath -> FileStatus -> Annex Bool +checkedCopyFile' key src dest s = catchBoolIO $ + ifM (checkDiskSpace' (fromIntegral $ fileSize s) (Just $ takeDirectory dest) key 0 True) ( liftIO $ copyFileExternal CopyAllMetaData src dest , return False ) diff --git a/Command/Lock.hs b/Command/Lock.hs index c425d7eb65..3eceaefe4d 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -15,9 +15,12 @@ import Annex.Version import Annex.Content import Annex.Link import Annex.InodeSentinal +import Annex.Perms +import Annex.ReplaceFile import Utility.InodeCache import qualified Database.Keys import qualified Command.Add +import Logs.Location cmd :: Command cmd = notDirect $ withGlobalOptions annexedMatchingOptions $ @@ -34,9 +37,12 @@ seek ps = ifM versionSupportsUnlockedPointers ) startNew :: FilePath -> Key -> CommandStart -startNew file key = do - showStart "lock" file - go =<< isPointerFile file +startNew file key = ifM (isJust <$> isAnnexLink file) + ( stop + , do + showStart "lock" file + go =<< isPointerFile file + ) where go (Just key') | key' == key = cont False @@ -53,33 +59,42 @@ startNew file key = do performNew :: FilePath -> Key -> Bool -> CommandPerform performNew file key filemodified = do - -- If other files use this same key, and are unlocked, - -- the annex object file might be hard linked to those files. - -- It's also possible that the annex object file was - -- modified while the file was unlocked. - -- - -- So, in order to lock the file's content, we need to break all - -- hard links to the annex object file, and if it's modified, - -- replace it with a copy of the content of one of the associated - -- files. - -- - -- When the file being locked is unmodified, the annex object file - -- can just be linked to it. (Which might already be the case, but - -- do it again to be sure.) - -- - -- When the file being locked is modified, find another associated - -- file that is unmodified, and copy it to the annex object file. - -- If there are no unmodified associated files, the content of - -- the key is lost. - -- - -- If the filesystem doesn't support hard links, none of this - -- is a concern. - obj <- calcRepo (gitAnnexLocation key) - - freezeContent obj + lockdown =<< calcRepo (gitAnnexLocation key) Command.Add.addLink file key =<< withTSDelta (liftIO . genInodeCache file) next $ cleanupNew file key + where + lockdown obj = do + ifM (sameInodeCache obj =<< Database.Keys.getInodeCaches key) + ( breakhardlink obj + , repopulate obj + ) + freezeContent obj + + -- It's ok if the file is hard linked to obj, but if some other + -- associated file is, we need to break that link to lock down obj. + breakhardlink obj = whenM ((> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do + mfc <- withTSDelta (liftIO . genInodeCache file) + unlessM (sameInodeCache obj (maybeToList mfc)) $ do + modifyContent obj $ replaceFile obj $ \tmp -> do + unlessM (checkedCopyFile key obj tmp) $ + error "unable to lock file; need more free disk space" + Database.Keys.storeInodeCaches key [obj] + + -- Try to repopulate obj from an unmodified associated file. + repopulate obj + | filemodified = modifyContent obj $ do + fs <- Database.Keys.getAssociatedFiles key + mfile <- firstM (isUnmodified key) fs + liftIO $ nukeFile obj + case mfile of + Just unmodified -> + unlessM (checkedCopyFile key unmodified obj) + lostcontent + Nothing -> lostcontent + | otherwise = modifyContent obj $ + liftIO $ renameFile file obj + lostcontent = logStatus key InfoMissing cleanupNew :: FilePath -> Key -> CommandCleanup cleanupNew file key = do diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index a693cf42f2..ce1db34a24 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -236,11 +236,8 @@ git annex lock/unlock: unlock will stage a pointer file, and will link the content of the object from .git/annex/objects to the work tree file. - lock will replace the current work tree file with the symlink, and stage it. - Note that multiple work tree files could point to the same object. - So, if the link count is > 1, replace the annex object with a copy of - itself to break such a hard link. Always finish by locking down the - permissions of the annex object. + lock will replace the current work tree file with the symlink, and stage it, + and lock down the permissions of the annex object. #### file map @@ -337,13 +334,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. (when not in direct mode). However, beware over-optimisation breaking the assistant or perhaps other long-lived processes. -* Convert `git annex lock` to verify that worktree file is not modified - (same check used when updating pointer files to the content of a key), - and then delete the worktree file and replace with an annex symlink. - - Allow --force to override the check and throw away modified content. - - Also needs to update associated files db. - - Also should check associated files db, and if there are no other - unlocked files for the key, freeze its object file. * Make v6 upgrade convert direct mode repo to repo with all unlocked files. * fsck will need some fixes to handle unlocked files. @@ -356,6 +346,7 @@ files to be unlocked, while the indirect upgrades don't touch the files. when pushing changes committed in such a repo. Ideally, should avoid committing implicit unlocks, or should prevent such commits leaking out in pushes. +* getKeysPresent needs to check if object file is modified ---- From e7183d83d367bb52f502266b11b5b6dff683279e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 16:05:56 -0400 Subject: [PATCH 42/96] fsck for v6 unlocked files This only adds 1 stat to each file fscked for locked files, so added overhead is minimal. For unlocked files it has to access the database to see if a file is modified. --- CmdLine/Seek.hs | 2 +- Command/Fsck.hs | 99 ++++++++++++++++++++++++++++---------------- Command/Migrate.hs | 2 +- doc/todo/smudge.mdwn | 1 - 4 files changed, 66 insertions(+), 38 deletions(-) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 48545ce042..f4ac4dfada 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -125,7 +125,7 @@ withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged -{- Unlocked files have changed type from a symlink to a regular file. +{- Unlocked files before v6 have changed type from a symlink to a regular file. - - Furthermore, unlocked files used to be a git-annex symlink, - not some other sort of symlink. diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 1531d2ab76..74e83670c1 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -34,6 +34,7 @@ import Utility.HumanTime import Utility.CopyFile import Git.FilePath import Utility.PID +import qualified Database.Keys #ifdef WITH_DATABASE import qualified Database.Fsck as FsckDb @@ -118,16 +119,18 @@ start from inc file key = do go = runFsck inc file key perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool -perform key file backend numcopies = check - -- order matters - [ fixLink key file - , verifyLocationLog key file - , verifyDirectMapping key file - , verifyDirectMode key file - , checkKeySize key - , checkBackend backend key (Just file) - , checkKeyNumCopies key (Just file) numcopies - ] +perform key file backend numcopies = do + keystatus <- getKeyStatus key + check + -- order matters + [ fixLink key file + , verifyLocationLog key keystatus file + , verifyDirectMapping key file + , verifyDirectMode key file + , checkKeySize key keystatus + , checkBackend backend key keystatus (Just file) + , checkKeyNumCopies key (Just file) numcopies + ] {- To fsck a remote, the content is retrieved to a tmp file, - and checked locally. -} @@ -183,19 +186,19 @@ startKey inc key numcopies = performKey key backend numcopies performKey :: Key -> Backend -> NumCopies -> Annex Bool -performKey key backend numcopies = check - [ verifyLocationLog key (key2file key) - , checkKeySize key - , checkBackend backend key Nothing - , checkKeyNumCopies key Nothing numcopies - ] +performKey key backend numcopies = do + keystatus <- getKeyStatus key + check + [ verifyLocationLog key keystatus (key2file key) + , checkKeySize key keystatus + , checkBackend backend key keystatus Nothing + , checkKeyNumCopies key Nothing numcopies + ] check :: [Annex Bool] -> Annex Bool check cs = and <$> sequence cs -{- Checks that the file's link points correctly to the content. - - - - In direct mode, there is only a link when the content is not present. +{- Checks that symlinks points correctly to the annexed content. -} fixLink :: Key -> FilePath -> Annex Bool fixLink key file = do @@ -214,19 +217,23 @@ fixLink key file = do {- Checks that the location log reflects the current status of the key, - in this repository only. -} -verifyLocationLog :: Key -> String -> Annex Bool -verifyLocationLog key desc = do - present <- inAnnex key +verifyLocationLog :: Key -> KeyStatus -> String -> Annex Bool +verifyLocationLog key keystatus desc = do + obj <- calcRepo $ gitAnnexLocation key + present <- if isKeyUnlocked keystatus + then liftIO (doesFileExist obj) + else inAnnex key direct <- isDirect u <- getUUID - {- Since we're checking that a key's file is present, throw + {- Since we're checking that a key's object file is present, throw - in a permission fixup here too. -} - file <- calcRepo $ gitAnnexLocation key - when (present && not direct) $ - freezeContent file - whenM (liftIO $ doesDirectoryExist $ parentDir file) $ - freezeContentDir file + when (present && not direct) $ void $ tryIO $ + if isKeyUnlocked keystatus + then thawContent obj + else freezeContent obj + whenM (liftIO $ doesDirectoryExist $ parentDir obj) $ + freezeContentDir obj {- In direct mode, modified files will show up as not present, - but that is expected and not something to do anything about. -} @@ -288,10 +295,11 @@ verifyDirectMode key file = do {- The size of the data for a key is checked against the size encoded in - the key's metadata, if available. - - - Not checked in direct mode, because files can be changed directly. + - Not checked when a file is unlocked, or in direct mode. -} -checkKeySize :: Key -> Annex Bool -checkKeySize key = ifM isDirect +checkKeySize :: Key -> KeyStatus -> Annex Bool +checkKeySize _ KeyUnlocked = return True +checkKeySize key KeyLocked = ifM isDirect ( return True , do file <- calcRepo $ gitAnnexLocation key @@ -326,18 +334,26 @@ checkKeySizeOr bad key file = case Types.Key.keySize key of , msg ] -{- Runs the backend specific check on a key's content. +{- Runs the backend specific check on a key's content object. + - + - When a file is unlocked, it may be a hard link to the object, + - thus when the user modifies the file, the object will be modified and + - not pass the check, and we don't want to find an error in this case. + - So, skip the check if the key is unlocked and modified. - - In direct mode this is not done if the file has clearly been modified, - because modification of direct mode files is allowed. It's still done - if the file does not appear modified, to catch disk corruption, etc. -} -checkBackend :: Backend -> Key -> Maybe FilePath -> Annex Bool -checkBackend backend key mfile = go =<< isDirect +checkBackend :: Backend -> Key -> KeyStatus -> Maybe FilePath -> Annex Bool +checkBackend backend key keystatus mfile = go =<< isDirect where go False = do content <- calcRepo $ gitAnnexLocation key - checkBackendOr badContent backend key content + ifM (pure (isKeyUnlocked keystatus) <&&> (not <$> isUnmodified key content)) + ( nocheck + , checkBackendOr badContent backend key content + ) go True = maybe nocheck checkdirect mfile checkdirect file = ifM (goodContent key file) ( checkBackendOr' (badContentDirect file) backend key file @@ -582,3 +598,16 @@ withFsckDb (StartIncremental h) a = a h withFsckDb NonIncremental _ = noop withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a #endif + +data KeyStatus = KeyLocked | KeyUnlocked + +isKeyUnlocked :: KeyStatus -> Bool +isKeyUnlocked KeyUnlocked = True +isKeyUnlocked KeyLocked = False + +getKeyStatus :: Key -> Annex KeyStatus +getKeyStatus key = do + obj <- calcRepo $ gitAnnexLocation key + unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) + <&&> (not . null <$> Database.Keys.getAssociatedFiles key) + return $ if unlocked then KeyUnlocked else KeyLocked diff --git a/Command/Migrate.hs b/Command/Migrate.hs index d1c7902d7d..b8d2eea877 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -72,7 +72,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey go (Just (newkey, knowngoodcontent)) | knowngoodcontent = finish newkey | otherwise = stopUnless checkcontent $ finish newkey - checkcontent = Command.Fsck.checkBackend oldbackend oldkey $ Just file + checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyLocked $ Just file finish newkey = stopUnless (Command.ReKey.linkKey oldkey newkey) $ next $ Command.ReKey.cleanup file oldkey newkey genkey = case maybe Nothing (\fm -> fm oldkey newbackend (Just file)) (fastMigrate oldbackend) of diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index ce1db34a24..5cff8672cc 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -336,7 +336,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. long-lived processes. * Make v6 upgrade convert direct mode repo to repo with all unlocked files. -* fsck will need some fixes to handle unlocked files. * Make automatic merge conflict resolution work for pointer files. - Should probably automatically handle merge conflicts between annex symlinks and pointer files too. Maybe by always resulting in a pointer From 1dad3af3fc1ef0e2e9c5880681aa0e3339d12796 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 16:12:42 -0400 Subject: [PATCH 43/96] checked getKeysPresent; it's ok for v6 unlocked files When a v6 unlocked files is removed from the work tree, unused doesn't show it. When it gets removed from the index, unused does show it. This is the same as a locked file. --- Annex/Content.hs | 6 +++--- doc/todo/smudge.mdwn | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index f0c8e25cdc..f4daa3813a 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -744,9 +744,9 @@ data KeyLocation = InAnnex | InRepository {- List of keys whose content exists in the specified location. - - InAnnex only lists keys under .git/annex/objects, - - while InRepository, in direct mode, also finds keys located in the - - work tree. + - InAnnex only lists keys with content in .git/annex/objects, + - while InRepository, in direct mode, also finds keys with content + - in the work tree. - - Note that InRepository has to check whether direct mode files - have goodContent. diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 5cff8672cc..47613774ca 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -345,7 +345,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. when pushing changes committed in such a repo. Ideally, should avoid committing implicit unlocks, or should prevent such commits leaking out in pushes. -* getKeysPresent needs to check if object file is modified ---- From cc2d78870cd6e9d55b6e2ec46abfb30a8fd4debb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 16:22:40 -0400 Subject: [PATCH 44/96] update --- doc/todo/smudge.mdwn | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 47613774ca..eeb34f1350 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -322,8 +322,9 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list * Dropping a smudged file causes git status to show it as modified, - because the timestamp has changed. Avoid this by preserving timestamp - of smudged files when manipulating. + because the timestamp has changed. Getting a smudged file can also + cause this. Avoid this by preserving timestamp of smudged files + when manipulating. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the Keys database is not used can be @@ -345,6 +346,7 @@ files to be unlocked, while the indirect upgrades don't touch the files. when pushing changes committed in such a repo. Ideally, should avoid committing implicit unlocks, or should prevent such commits leaking out in pushes. +* Test suite should have a pass that runs with files unlocked. ---- From 42caf42857f9c993dc96603c4938213f84068d52 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 17:53:37 -0400 Subject: [PATCH 45/96] avoid smudge filter returning invalid content 1. git add file 2. git commit 3. modify file 4. git commit 5. git reset HEAD^ Before this fix, that resulted in git saying the file was modified. And indeed, it didn't have the content it should in the just checked out ref, because step 3 modified the object file for the old key. --- Command/Smudge.hs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 1353c27915..b7f18085ad 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -48,10 +48,19 @@ smudge file = do case parseLinkOrPointer b of Nothing -> liftIO $ B.putStr b Just k -> do + -- A previous unlocked checkout of the file may have + -- led to the annex object getting modified; + -- don't provide such modified content as it + -- will be confusing. inAnnex will detect + -- modifications. + ifM (inAnnex k) + ( do + content <- calcRepo (gitAnnexLocation k) + liftIO $ B.putStr . fromMaybe b + =<< catchMaybeIO (B.readFile content) + , liftIO $ B.putStr b + ) Database.Keys.addAssociatedFile k file - content <- calcRepo (gitAnnexLocation k) - liftIO $ B.hPut stdout . fromMaybe b - =<< catchMaybeIO (B.readFile content) stop -- Clean filter is fed file content on stdin, decides if a file From 9fcc5046b388b39eb5ae20ae0b55e3bc05aaa7e6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 12:38:32 -0400 Subject: [PATCH 46/96] todo --- doc/todo/smudge.mdwn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index eeb34f1350..7c277d016f 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -325,6 +325,9 @@ files to be unlocked, while the indirect upgrades don't touch the files. because the timestamp has changed. Getting a smudged file can also cause this. Avoid this by preserving timestamp of smudged files when manipulating. +* Clean filter should check if the filename was already in use by an old + key. The annex object for it may have been modified due to hard link, and + that should be cleaned up when the new version is added. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the Keys database is not used can be From 2bc920e266c665fed3cf12b45494b87f1cbaf97e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 13:02:33 -0400 Subject: [PATCH 47/96] update inode cache to cover file even when nothing needs to be done to linkAnnex This covers the case where multiple files have the same content and are added with git add. Previously only the one that was linked to the annex got its inode cached; now both are. --- Annex/Content.hs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index f4daa3813a..e99dfb1dd4 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -27,6 +27,7 @@ module Annex.Content ( linkAnnex, linkAnnex', LinkAnnexResult(..), + unlinkAnnex, checkedCopyFile, sendAnnex, prepSendAnnex, @@ -512,7 +513,6 @@ populatePointerFile k obj f = go =<< isPointerFile f liftIO $ writeFile f (formatPointer k) go _ = return () - {- Hard links a file into .git/annex/objects/, falling back to a copy - if necessary. Does nothing if the object file already exists. - @@ -527,17 +527,22 @@ linkAnnex key src = do modifyContent dest $ linkAnnex' key src dest {- Hard links (or copies) src to dest, one of which should be the - - annex object. -} + - annex object. Updates inode cache for src and for dest when it's + - changed. -} linkAnnex' :: Key -> FilePath -> FilePath -> Annex LinkAnnexResult linkAnnex' key src dest = ifM (liftIO $ doesFileExist dest) - ( return LinkAnnexNoop + ( do + Database.Keys.storeInodeCaches key [src] + return LinkAnnexNoop , ifM (linkAnnex'' key src dest) ( do thawContent dest Database.Keys.storeInodeCaches key [dest, src] return LinkAnnexOk - , return LinkAnnexFailed + , do + Database.Keys.storeInodeCaches key [src] + return LinkAnnexFailed ) ) @@ -560,6 +565,14 @@ linkAnnex'' key src dest = catchBoolIO $ do copy #endif +{- Removes the annex object file for a key. Lowlevel. -} +unlinkAnnex :: Key -> Annex () +unlinkAnnex key = do + obj <- calcRepo $ gitAnnexLocation key + modifyContent obj $ do + secureErase obj + liftIO $ nukeFile obj + {- Checks disk space before copying. -} checkedCopyFile :: Key -> FilePath -> FilePath -> Annex Bool checkedCopyFile key src dest = catchBoolIO $ From 0a7a2dae4ee1247ec7825d5f6feae6ede32d8d72 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 13:05:23 -0400 Subject: [PATCH 48/96] add getAssociatedKey I guess this is just as efficient as the getAssociatedFiles query, but I have not tried to optimise the database yet. --- Database/Keys.hs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Database/Keys.hs b/Database/Keys.hs index 78d583d633..62c7c25eb4 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -17,6 +17,7 @@ module Database.Keys ( shutdown, addAssociatedFile, getAssociatedFiles, + getAssociatedKey, removeAssociatedFile, storeInodeCaches, addInodeCaches, @@ -118,6 +119,19 @@ getAssociatedFiles' sk = do return (r ^. AssociatedFile) return $ map unValue l +{- Gets any keys that are on record as having a particular associated file. + - (Should be one or none but the database doesn't enforce that.) -} +getAssociatedKey :: FilePath -> Annex [Key] +getAssociatedKey f = withDbHandle $ \h -> H.queryDb h $ + getAssociatedKey' f + +getAssociatedKey' :: FilePath -> SqlPersistM [Key] +getAssociatedKey' f = do + l <- select $ from $ \r -> do + where_ (r ^. AssociatedFile ==. val f) + return (r ^. AssociatedKey) + return $ map (fromSKey . unValue) l + removeAssociatedFile :: Key -> FilePath -> Annex () removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ delete $ from $ \r -> do From 71e2050f8f3138b276a2d2dc652e30d04db46474 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 13:06:52 -0400 Subject: [PATCH 49/96] have clean filter check if the filename was already in use by an old key The annex object for it may have been modified due to hard link, and that should be cleaned up when the new version is added. If another associated file has the old key's content, that's linked into the annex object. Otherwise, update location log to reflect that content has been lost. --- Command/Smudge.hs | 26 +++++++++++++++++++++++++- doc/todo/smudge.mdwn | 3 --- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index b7f18085ad..e6541bc6d6 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -13,6 +13,7 @@ import Annex.Content import Annex.Link import Annex.MetaData import Annex.FileMatcher +import Annex.InodeSentinal import Types.KeySource import Backend import Logs.Location @@ -51,7 +52,7 @@ smudge file = do -- A previous unlocked checkout of the file may have -- led to the annex object getting modified; -- don't provide such modified content as it - -- will be confusing. inAnnex will detect + -- will be confusing. inAnnex will detect such -- modifications. ifM (inAnnex k) ( do @@ -74,12 +75,35 @@ clean file = do else ifM (shouldAnnex file) ( do k <- ingest file + oldkeys <- filter (/= k) + <$> Database.Keys.getAssociatedKey file + mapM_ (cleanOldKey file) oldkeys Database.Keys.addAssociatedFile k file liftIO $ emitPointer k , liftIO $ B.hPut stdout b ) stop +-- If the file being cleaned was hard linked to the old key's annex object, +-- modifying the file will have caused the object to have the wrong content. +-- Clean up from that, making the +cleanOldKey :: FilePath -> Key -> Annex () +cleanOldKey modifiedfile key = do + obj <- calcRepo (gitAnnexLocation key) + caches <- Database.Keys.getInodeCaches key + unlessM (sameInodeCache obj caches) $ do + unlinkAnnex key + fs <- filter (/= modifiedfile) + <$> Database.Keys.getAssociatedFiles key + fs' <- filterM (`sameInodeCache` caches) fs + case fs' of + -- If linkAnnex fails, the file with the content + -- is still present, so no need for any recovery. + (f:_) -> void $ linkAnnex key f + _ -> lostcontent + where + lostcontent = logStatus key InfoMissing + shouldAnnex :: FilePath -> Annex Bool shouldAnnex file = do matcher <- largeFilesMatcher diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 7c277d016f..eeb34f1350 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -325,9 +325,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. because the timestamp has changed. Getting a smudged file can also cause this. Avoid this by preserving timestamp of smudged files when manipulating. -* Clean filter should check if the filename was already in use by an old - key. The annex object for it may have been modified due to hard link, and - that should be cleaned up when the new version is added. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the Keys database is not used can be From a4a813fb0764983ea6e1ae37b7a9ee9665caf266 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:02:53 -0400 Subject: [PATCH 50/96] add: no need to make pass for old unlocked files in v6 --- Command/Add.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Command/Add.hs b/Command/Add.hs index f4bdc70c98..948a0d94cb 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -33,6 +33,7 @@ import Annex.ReplaceFile import Utility.Tmp import Utility.CopyFile import Annex.InodeSentinal +import Annex.Version import Control.Exception (IOException) @@ -67,7 +68,8 @@ seek o = allowConcurrentOutput $ do go $ withFilesNotInGit (not $ includeDotFiles o) ifM isDirect ( go withFilesMaybeModified - , go withFilesUnlocked + , unlessM versionSupportsUnlockedPointers $ + go withFilesUnlocked ) {- Pass file off to git-add. -} From d1bb518e25dba1fd43fe2188988cf0eac5276ada Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:07:54 -0400 Subject: [PATCH 51/96] update doc for v6 --- doc/git-annex-add.mdwn | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/doc/git-annex-add.mdwn b/doc/git-annex-add.mdwn index cfeb8a98e5..2f95f54550 100644 --- a/doc/git-annex-add.mdwn +++ b/doc/git-annex-add.mdwn @@ -11,15 +11,21 @@ git annex add `[path ...]` Adds files in the path to the annex. If no path is specified, adds files from the current directory and below. -Normally, files that are already checked into git, or that git has been -configured to ignore will be silently skipped. +Files that are already checked into git and are unmodified, or that +git has been configured to ignore will be silently skipped. -If annex.largefiles is configured, and does not match a file that is being -added, `git annex add` will behave the same as `git add` and add the -non-large file directly to the git repository, instead of to the annex. +If annex.largefiles is configured, and does not match a file, `git annex +add` will behave the same as `git add` and add the non-large file directly +to the git repository, instead of to the annex. -This command can also be used to add symbolic links, both symlinks to -annexed content, and other symlinks. +Large files are added to the annex in locked form, which prevents further +modification of their content unless unlocked by [[git-annex-unlock]](1). +To add a file to the annex in unlocked form, `git add` can be used instead +(that only works when the repository has annex.version 6 or higher). + +Files that are not annexed files (including symbolic links and files +checked directly into git) will be staged in the index the same as if `git +add` were used. # OPTIONS From a983a3a7a2dbee51d42686ee507cdc55c5f591a3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:08:07 -0400 Subject: [PATCH 52/96] rename stuff for v5 unlocked files to indicate it's old --- CmdLine/Seek.hs | 18 +++++++++--------- Command/Add.hs | 2 +- Command/Lock.hs | 4 ++-- Command/PreCommit.hs | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index f4ac4dfada..e6ee6f3fe4 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -119,25 +119,25 @@ withFilesToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek withFilesToBeCommitted a params = seekActions $ prepFiltered a $ seekHelper LsFiles.stagedNotDeleted params -withFilesUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged +withFilesOldUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlocked = withFilesOldUnlocked' LsFiles.typeChanged -withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged +withFilesOldUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlockedToBeCommitted = withFilesOldUnlocked' LsFiles.typeChangedStaged {- Unlocked files before v6 have changed type from a symlink to a regular file. - - Furthermore, unlocked files used to be a git-annex symlink, - not some other sort of symlink. -} -withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek -withFilesUnlocked' typechanged a params = seekActions $ +withFilesOldUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek +withFilesOldUnlocked' typechanged a params = seekActions $ prepFiltered a unlockedfiles where - unlockedfiles = filterM isUnlocked =<< seekHelper typechanged params + unlockedfiles = filterM isOldUnlocked =<< seekHelper typechanged params -isUnlocked :: FilePath -> Annex Bool -isUnlocked f = liftIO (notSymlink f) <&&> +isOldUnlocked :: FilePath -> Annex Bool +isOldUnlocked f = liftIO (notSymlink f) <&&> (isJust <$> catKeyFile f <||> isJust <$> catKeyFileHEAD f) {- Finds files that may be modified. -} diff --git a/Command/Add.hs b/Command/Add.hs index 948a0d94cb..a0bcf789e5 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -69,7 +69,7 @@ seek o = allowConcurrentOutput $ do ifM isDirect ( go withFilesMaybeModified , unlessM versionSupportsUnlockedPointers $ - go withFilesUnlocked + go withFilesOldUnlocked ) {- Pass file off to git-add. -} diff --git a/Command/Lock.hs b/Command/Lock.hs index 3eceaefe4d..16ddce9424 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -32,8 +32,8 @@ seek :: CmdParams -> CommandSeek seek ps = ifM versionSupportsUnlockedPointers ( withFilesInGit (whenAnnexed startNew) ps , do - withFilesUnlocked startOld ps - withFilesUnlockedToBeCommitted startOld ps + withFilesOldUnlocked startOld ps + withFilesOldUnlockedToBeCommitted startOld ps ) startNew :: FilePath -> Key -> CommandStart diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index b6f52d01c2..71a9f86f8c 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -46,7 +46,7 @@ seek ps = lockPreCommitHook $ ifM isDirect ifM (liftIO Git.haveFalseIndex) ( do (fs, cleanup) <- inRepo $ Git.typeChangedStaged ps - whenM (anyM isUnlocked fs) $ + whenM (anyM isOldUnlocked fs) $ error "Cannot make a partial commit with unlocked annexed files. You should `git annex add` the files you want to commit, and then run git commit." void $ liftIO cleanup , do @@ -58,7 +58,7 @@ seek ps = lockPreCommitHook $ ifM isDirect -- (not needed when repo version uses -- unlocked pointer files) unlessM versionSupportsUnlockedPointers $ - withFilesUnlockedToBeCommitted startInjectUnlocked ps + withFilesOldUnlockedToBeCommitted startInjectUnlocked ps ) runAnnexHook preCommitAnnexHook -- committing changes to a view updates metadata From d245a805181e7b12ae7a01686fb598b147f6dbd5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:09:36 -0400 Subject: [PATCH 53/96] avoid pre-commit check having to do with v5 unlocked files when in v6 mode --- Command/PreCommit.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 71a9f86f8c..cbf7f6e3de 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -43,7 +43,7 @@ seek ps = lockPreCommitHook $ ifM isDirect withWords startDirect ps runAnnexHook preCommitAnnexHook , do - ifM (liftIO Git.haveFalseIndex) + ifM (not <$> versionSupportsUnlockedPointers <&&> liftIO Git.haveFalseIndex) ( do (fs, cleanup) <- inRepo $ Git.typeChangedStaged ps whenM (anyM isOldUnlocked fs) $ From c7a066a3e4df24a40541bad2815ce5ef657dc790 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:14:19 -0400 Subject: [PATCH 54/96] fix incorrect doc change --- doc/git-annex-add.mdwn | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/git-annex-add.mdwn b/doc/git-annex-add.mdwn index 2f95f54550..772d789713 100644 --- a/doc/git-annex-add.mdwn +++ b/doc/git-annex-add.mdwn @@ -23,9 +23,8 @@ modification of their content unless unlocked by [[git-annex-unlock]](1). To add a file to the annex in unlocked form, `git add` can be used instead (that only works when the repository has annex.version 6 or higher). -Files that are not annexed files (including symbolic links and files -checked directly into git) will be staged in the index the same as if `git -add` were used. +This command can also be used to add symbolic links, both symlinks to +annexed content, and other symlinks. # OPTIONS From 8a660a7b14b38dc4784d1e0a9ebefede45a952f4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:17:00 -0400 Subject: [PATCH 55/96] add: In v6 mode, acts on modified files. Same as was done in direct mode, except in v6 mode add always adds files locked, so --- Command/Add.hs | 5 ++--- debian/changelog | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Command/Add.hs b/Command/Add.hs index a0bcf789e5..8cbaf189af 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -66,10 +66,9 @@ seek o = allowConcurrentOutput $ do , startSmall file ) go $ withFilesNotInGit (not $ includeDotFiles o) - ifM isDirect + ifM (versionSupportsUnlockedPointers <||> isDirect) ( go withFilesMaybeModified - , unlessM versionSupportsUnlockedPointers $ - go withFilesOldUnlocked + , go withFilesOldUnlocked ) {- Pass file off to git-add. -} diff --git a/debian/changelog b/debian/changelog index 57c69a1e89..b3612da260 100644 --- a/debian/changelog +++ b/debian/changelog @@ -11,6 +11,7 @@ git-annex (6.20151225) unstable; urgency=medium newly initialized repository; it will add files to the annex. * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a pointer file, and this change can be committed to the git repository. + * add: In v6 mode, acts on modified files. -- Joey Hess Tue, 08 Dec 2015 11:14:03 -0400 From 178dbe8cb1524e9c7037854f3a88b87984ab50ee Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:17:34 -0400 Subject: [PATCH 56/96] doc update --- doc/git-annex-add.mdwn | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/git-annex-add.mdwn b/doc/git-annex-add.mdwn index 772d789713..7f796fec1f 100644 --- a/doc/git-annex-add.mdwn +++ b/doc/git-annex-add.mdwn @@ -20,6 +20,7 @@ to the git repository, instead of to the annex. Large files are added to the annex in locked form, which prevents further modification of their content unless unlocked by [[git-annex-unlock]](1). +(This is not the case however when a repository is in direct mode.) To add a file to the annex in unlocked form, `git add` can be used instead (that only works when the repository has annex.version 6 or higher). From 3ba6d84559d4779661c6c40544dfd79109f5bdb2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:18:44 -0400 Subject: [PATCH 57/96] clarify --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index b3612da260..21d875d1d8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -11,7 +11,7 @@ git-annex (6.20151225) unstable; urgency=medium newly initialized repository; it will add files to the annex. * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a pointer file, and this change can be committed to the git repository. - * add: In v6 mode, acts on modified files. + * add: In v6 mode, adds modified files to the annex. -- Joey Hess Tue, 08 Dec 2015 11:14:03 -0400 From 0ddcaae9c11dc77ac885c5851e4f78940e668ba1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 14:27:20 -0400 Subject: [PATCH 58/96] changes for v6 broke fsck in direct mode --- Command/Fsck.hs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 74e83670c1..42bc930749 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -299,15 +299,12 @@ verifyDirectMode key file = do -} checkKeySize :: Key -> KeyStatus -> Annex Bool checkKeySize _ KeyUnlocked = return True -checkKeySize key KeyLocked = ifM isDirect - ( return True - , do - file <- calcRepo $ gitAnnexLocation key - ifM (liftIO $ doesFileExist file) - ( checkKeySizeOr badContent key file - , return True - ) - ) +checkKeySize key KeyLocked = do + file <- calcRepo $ gitAnnexLocation key + ifM (liftIO $ doesFileExist file) + ( checkKeySizeOr badContent key file + , return True + ) checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool checkKeySizeRemote _ _ Nothing = return True @@ -606,8 +603,11 @@ isKeyUnlocked KeyUnlocked = True isKeyUnlocked KeyLocked = False getKeyStatus :: Key -> Annex KeyStatus -getKeyStatus key = do - obj <- calcRepo $ gitAnnexLocation key - unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) - <&&> (not . null <$> Database.Keys.getAssociatedFiles key) - return $ if unlocked then KeyUnlocked else KeyLocked +getKeyStatus key = ifM isDirect + ( return KeyUnlocked + , do + obj <- calcRepo $ gitAnnexLocation key + unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) + <&&> (not . null <$> Database.Keys.getAssociatedFiles key) + return $ if unlocked then KeyUnlocked else KeyLocked + ) From cdd27b8920c434b48af2575fea6cec9cf7acbaf3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 15:34:28 -0400 Subject: [PATCH 59/96] reorg --- Annex/Direct.hs | 2 +- Annex/View.hs | 4 ++-- Annex/WorkTree.hs | 35 ++++++++++++++++++++++++++++ Assistant/Threads/TransferScanner.hs | 4 ++-- Assistant/Threads/Watcher.hs | 4 ++-- Backend.hs | 18 -------------- Command.hs | 11 ++------- Command/Unused.hs | 5 ++-- Command/Upgrade.hs | 1 + Limit.hs | 4 ++-- Test.hs | 9 +++---- 11 files changed, 54 insertions(+), 43 deletions(-) create mode 100644 Annex/WorkTree.hs diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 8fced2d444..8c3d5bb562 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -399,7 +399,7 @@ changedDirect oldk f = do whenM (pure (null locs) <&&> not <$> inAnnex oldk) $ logStatus oldk InfoMissing -{- Enable/disable direct mode. -} +{- Git config settings to enable/disable direct mode. -} setDirect :: Bool -> Annex () setDirect wantdirect = do if wantdirect diff --git a/Annex/View.hs b/Annex/View.hs index 567522a541..8ddbb9c638 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -22,7 +22,7 @@ import Git.Sha import Git.HashObject import Git.Types import Git.FilePath -import qualified Backend +import Annex.WorkTree import Annex.Index import Annex.Link import Annex.CatFile @@ -342,7 +342,7 @@ applyView' mkviewedfile getfilemetadata view = do hasher <- inRepo hashObjectStart forM_ l $ \f -> do relf <- getTopFilePath <$> inRepo (toTopFilePath f) - go uh hasher relf =<< Backend.lookupFile f + go uh hasher relf =<< lookupFile f liftIO $ do hashObjectStop hasher void $ stopUpdateIndex uh diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs new file mode 100644 index 0000000000..26144e7f9e --- /dev/null +++ b/Annex/WorkTree.hs @@ -0,0 +1,35 @@ +{- git-annex worktree files + - + - Copyright 2013-2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.WorkTree where + +import Common.Annex +import Annex.Link +import Annex.CatFile + +{- Looks up the key corresponding to an annexed file, + - by examining what the file links to. + - + - An unlocked file will not have a link on disk, so fall back to + - looking for a pointer to a key in git. + -} +lookupFile :: FilePath -> Annex (Maybe Key) +lookupFile file = do + mkey <- isAnnexLink file + case mkey of + Just key -> makeret key + Nothing -> maybe (return Nothing) makeret =<< catKeyFile file + where + makeret = return . Just + +{- Modifies an action to only act on files that are already annexed, + - and passes the key on to it. -} +whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a) +whenAnnexed a file = ifAnnexed file (a file) (return Nothing) + +ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a +ifAnnexed file yes no = maybe no yes =<< lookupFile file diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs index f35c1f1f53..7386d55286 100644 --- a/Assistant/Threads/TransferScanner.hs +++ b/Assistant/Threads/TransferScanner.hs @@ -25,7 +25,7 @@ import Utility.ThreadScheduler import Utility.NotificationBroadcaster import Utility.Batch import qualified Git.LsFiles as LsFiles -import qualified Backend +import Annex.WorkTree import Annex.Content import Annex.Wanted import CmdLine.Action @@ -142,7 +142,7 @@ expensiveScan urlrenderer rs = batch <~> do (unwanted', ts) <- maybe (return (unwanted, [])) (findtransfers f unwanted) - =<< liftAnnex (Backend.lookupFile f) + =<< liftAnnex (lookupFile f) mapM_ (enqueue f) ts scan unwanted' fs diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 8c6ff378dd..37e0154b45 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -28,7 +28,7 @@ import qualified Annex.Queue import qualified Git import qualified Git.UpdateIndex import qualified Git.LsFiles as LsFiles -import qualified Backend +import Annex.WorkTree import Annex.Direct import Annex.Content.Direct import Annex.CatFile @@ -270,7 +270,7 @@ onAddDirect symlinkssupported matcher file fs = do onAddSymlink :: Bool -> Handler onAddSymlink isdirect file filestatus = unlessIgnored file $ do linktarget <- liftIO (catchMaybeIO $ readSymbolicLink file) - kv <- liftAnnex (Backend.lookupFile file) + kv <- liftAnnex (lookupFile file) onAddSymlink' linktarget kv isdirect file filestatus onAddSymlink' :: Maybe String -> Maybe Key -> Bool -> Handler diff --git a/Backend.hs b/Backend.hs index d37eed34af..c2f3d28d41 100644 --- a/Backend.hs +++ b/Backend.hs @@ -9,7 +9,6 @@ module Backend ( list, orderedList, genKey, - lookupFile, getBackend, chooseBackend, lookupBackendName, @@ -20,8 +19,6 @@ module Backend ( import Common.Annex import qualified Annex import Annex.CheckAttr -import Annex.CatFile -import Annex.Link import Types.Key import Types.KeySource import qualified Types.Backend as B @@ -76,21 +73,6 @@ genKey' (b:bs) source = do | c == '\n' = '_' | otherwise = c -{- Looks up the key corresponding to an annexed file, - - by examining what the file links to. - - - - An unlocked file will not have a link on disk, so fall back to - - looking for a pointer to a key in git. - -} -lookupFile :: FilePath -> Annex (Maybe Key) -lookupFile file = do - mkey <- isAnnexLink file - case mkey of - Just key -> makeret key - Nothing -> maybe (return Nothing) makeret =<< catKeyFile file - where - makeret = return . Just - getBackend :: FilePath -> Key -> Annex (Maybe Backend) getBackend file k = let bname = keyBackendName k in case maybeLookupBackendName bname of diff --git a/Command.hs b/Command.hs index bee63bb741..387f7b8b56 100644 --- a/Command.hs +++ b/Command.hs @@ -18,12 +18,13 @@ module Command ( stopUnless, whenAnnexed, ifAnnexed, + lookupFile, isBareRepo, module ReExported ) where import Common.Annex -import qualified Backend +import Annex.WorkTree import qualified Git import Types.Command as ReExported import Types.Option as ReExported @@ -100,13 +101,5 @@ stop = return Nothing stopUnless :: Annex Bool -> Annex (Maybe a) -> Annex (Maybe a) stopUnless c a = ifM c ( a , stop ) -{- Modifies an action to only act on files that are already annexed, - - and passes the key on to it. -} -whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a) -whenAnnexed a file = ifAnnexed file (a file) (return Nothing) - -ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a -ifAnnexed file yes no = maybe no yes =<< Backend.lookupFile file - isBareRepo :: Annex Bool isBareRepo = fromRepo Git.repoIsLocalBare diff --git a/Command/Unused.hs b/Command/Unused.hs index 4756cda5df..4353bd075e 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -24,7 +24,6 @@ import qualified Git.Branch import qualified Git.RefLog import qualified Git.LsFiles as LsFiles import qualified Git.DiffTree as DiffTree -import qualified Backend import qualified Remote import qualified Annex.Branch import Annex.CatFile @@ -215,7 +214,7 @@ withKeysReferenced' mdir initial a = do Just dir -> inRepo $ LsFiles.inRepo [dir] go v [] = return v go v (f:fs) = do - x <- Backend.lookupFile f + x <- lookupFile f case x of Nothing -> go v fs Just k -> do @@ -266,7 +265,7 @@ withKeysReferencedInGitRef a ref = do forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a liftIO $ void clean where - tKey True = Backend.lookupFile . getTopFilePath . DiffTree.file + tKey True = lookupFile . getTopFilePath . DiffTree.file tKey False = fileKey . takeFileName . decodeBS <$$> catFile ref . getTopFilePath . DiffTree.file diff --git a/Command/Upgrade.hs b/Command/Upgrade.hs index c02a6709f9..8a34022e3c 100644 --- a/Command/Upgrade.hs +++ b/Command/Upgrade.hs @@ -13,6 +13,7 @@ import Upgrade cmd :: Command cmd = dontCheck repoExists $ -- because an old version may not seem to exist + noDaemonRunning $ -- avoid upgrading repo out from under daemon command "upgrade" SectionMaintenance "upgrade repository layout" paramNothing (withParams seek) diff --git a/Limit.hs b/Limit.hs index 321c1122b3..437c65bc35 100644 --- a/Limit.hs +++ b/Limit.hs @@ -11,8 +11,8 @@ import Common.Annex import qualified Annex import qualified Utility.Matcher import qualified Remote -import qualified Backend import Annex.Content +import Annex.WorkTree import Annex.Action import Annex.UUID import Logs.Trust @@ -277,7 +277,7 @@ addTimeLimit s = do else return True lookupFileKey :: FileInfo -> Annex (Maybe Key) -lookupFileKey = Backend.lookupFile . currFile +lookupFileKey = lookupFile . currFile checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a diff --git a/Test.hs b/Test.hs index f4035f6051..1a0601b35c 100644 --- a/Test.hs +++ b/Test.hs @@ -65,6 +65,7 @@ import qualified Types.Messages import qualified Config import qualified Config.Cost import qualified Crypto +import qualified Annex.WorkTree import qualified Annex.Init import qualified Annex.CatFile import qualified Annex.View @@ -810,7 +811,7 @@ test_unused = intmpclonerepoInDirect $ do assertEqual ("unused keys differ " ++ desc) (sort expectedkeys) (sort unusedkeys) findkey f = do - r <- Backend.lookupFile f + r <- Annex.WorkTree.lookupFile f return $ fromJust r test_describe :: Assertion @@ -1380,7 +1381,7 @@ test_crypto = do (c,k) <- annexeval $ do uuid <- Remote.nameToUUID "foo" rs <- Logs.Remote.readRemoteLog - Just k <- Backend.lookupFile annexedfile + Just k <- Annex.WorkTree.lookupFile annexedfile return (fromJust $ M.lookup uuid rs, k) let key = if scheme `elem` ["hybrid","pubkey"] then Just $ Utility.Gpg.KeyIds [Utility.Gpg.testKeyId] @@ -1684,7 +1685,7 @@ checkdangling f = ifM (annexeval Config.crippledFileSystem) checklocationlog :: FilePath -> Bool -> Assertion checklocationlog f expected = do thisuuid <- annexeval Annex.UUID.getUUID - r <- annexeval $ Backend.lookupFile f + r <- annexeval $ Annex.WorkTree.lookupFile f case r of Just k -> do uuids <- annexeval $ Remote.keyLocations k @@ -1695,7 +1696,7 @@ checklocationlog f expected = do checkbackend :: FilePath -> Types.Backend -> Assertion checkbackend file expected = do b <- annexeval $ maybe (return Nothing) (Backend.getBackend file) - =<< Backend.lookupFile file + =<< Annex.WorkTree.lookupFile file assertEqual ("backend for " ++ file) (Just expected) b inlocationlog :: FilePath -> Assertion From f9d077186a1f1e269b2fe3a86eb2be206f620a4c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 15:56:37 -0400 Subject: [PATCH 60/96] implemented upgrade of direct mode repo to v6 --- Annex/Content.hs | 11 ++++-- Upgrade/V5.hs | 79 +++++++++++++++++++++++++++++++++++++++++++- doc/todo/smudge.mdwn | 10 +++--- doc/upgrades.mdwn | 16 ++++----- 4 files changed, 101 insertions(+), 15 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index e99dfb1dd4..4cd2b02597 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -753,13 +753,14 @@ moveBad key = do logStatus key InfoMissing return dest -data KeyLocation = InAnnex | InRepository +data KeyLocation = InAnnex | InRepository | InAnywhere {- List of keys whose content exists in the specified location. - InAnnex only lists keys with content in .git/annex/objects, - while InRepository, in direct mode, also finds keys with content - - in the work tree. + - in the work tree. InAnywhere lists all keys that have directories + - in .git/annex/objects, whether or not the content is present. - - Note that InRepository has to check whether direct mode files - have goodContent. @@ -788,6 +789,11 @@ getKeysPresent keyloc = do morekeys <- unsafeInterleaveIO a continue (morekeys++keys) as + inanywhere = case keyloc of + InAnywhere -> True + _ -> False + + present _ _ _ | inanywhere = pure True present _ False d = presentInAnnex d present s True d = presentDirect s d <||> presentInAnnex d @@ -800,6 +806,7 @@ getKeysPresent keyloc = do Nothing -> return False Just k -> Annex.eval s $ anyM (Direct.goodContent k) =<< Direct.associatedFiles k + InAnywhere -> return True {- In order to run Annex monad actions within unsafeInterleaveIO, - the current state is taken and reused. No changes made to this diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs index e4501302dd..2073a0150f 100644 --- a/Upgrade/V5.hs +++ b/Upgrade/V5.hs @@ -1,4 +1,4 @@ -{- git-annex v5 -> v6 uppgrade support +{- git-annex v5 -> v6 upgrade support - - Copyright 2015 Joey Hess - @@ -10,11 +10,36 @@ module Upgrade.V5 where import Common.Annex import Config import Annex.InodeSentinal +import Annex.Link +import Annex.Direct +import Annex.Content +import Annex.WorkTree +import qualified Database.Keys +import qualified Annex.Content.Direct as Direct +import qualified Git +import qualified Git.LsFiles +import qualified Git.Branch +import Git.FileMode upgrade :: Bool -> Annex Bool upgrade automatic = do unless automatic $ showAction "v5 to v6" + whenM isDirect $ do + {- Since upgrade from direct mode changes how files + - are represented in git, commit any changes in the + - work tree first. -} + whenM stageDirect $ do + unless automatic $ + showAction "committing first" + upgradeDirectCommit automatic + "commit before upgrade to annex.version 6" + setDirect False + upgradeDirectWorkTree + removeDirectCruft + showLongNote "Upgraded repository out of direct mode." + showLongNote "Changes have been staged for all annexed files in this repository; you should run `git commit` to commit these changes." + showLongNote "Any other clones of this repository that use direct mode need to be upgraded now, too." configureSmudgeFilter -- Inode sentinal file was only used in direct mode and when -- locking down files as they were added. In v6, it's used more @@ -23,3 +48,55 @@ upgrade automatic = do unlessM (isDirect) $ createInodeSentinalFile True return True + +upgradeDirectCommit :: Bool -> String -> Annex () +upgradeDirectCommit automatic msg = + void $ inRepo $ Git.Branch.commitCommand commitmode + [ Param "-m" + , Param msg + ] + where + commitmode = if automatic then Git.Branch.AutomaticCommit else Git.Branch.ManualCommit + +{- Walk work tree from top and convert all annex symlinks to pointer files, + - staging them in the index, and updating the work tree files with + - either the content of the object, or the pointer file content. -} +upgradeDirectWorkTree :: Annex () +upgradeDirectWorkTree = do + top <- fromRepo Git.repoPath + (l, clean) <- inRepo $ Git.LsFiles.stagedDetails [top] + forM_ l go + void $ liftIO clean + where + go (f, Just _sha, Just mode) | isSymLink mode = do + mk <- lookupFile f + case mk of + Nothing -> noop + Just k -> do + ifM (isJust <$> getAnnexLinkTarget f) + ( writepointer f k + , fromdirect f k + ) + stagePointerFile f =<< hashPointerFile k + Database.Keys.addAssociatedFile k f + return () + go _ = noop + + fromdirect f k = do + -- If linkAnnex fails for some reason, the work tree file + -- still has the content; the annex object file is just + -- not populated with it. Since the work tree file + -- is recorded as an associated file, things will still + -- work that way, it's just not ideal. + void $ linkAnnex k f + writepointer f k = liftIO $ do + nukeFile f + writeFile f (formatPointer k) + +{- Remove all direct mode bookkeeping files. -} +removeDirectCruft :: Annex () +removeDirectCruft = mapM_ go =<< getKeysPresent InAnywhere + where + go k = do + Direct.removeInodeCache k + Direct.removeAssociatedFiles k diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index eeb34f1350..6623a6d187 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -323,8 +323,12 @@ files to be unlocked, while the indirect upgrades don't touch the files. * Dropping a smudged file causes git status to show it as modified, because the timestamp has changed. Getting a smudged file can also - cause this. Avoid this by preserving timestamp of smudged files - when manipulating. + cause this. Upgrading a direct mode repo also leaves files in this state. + User can use `git add` to clear it up, but better to avoid this, + by updating stat info in the index. + (May need to use libgit2 to do this, cannot find + any plumbing except git-update-index, which is very inneficient for + smudged files.) * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the Keys database is not used can be @@ -335,8 +339,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. (when not in direct mode). However, beware over-optimisation breaking the assistant or perhaps other long-lived processes. -* Make v6 upgrade convert direct mode repo to repo with all unlocked - files. * Make automatic merge conflict resolution work for pointer files. - Should probably automatically handle merge conflicts between annex symlinks and pointer files too. Maybe by always resulting in a pointer diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn index d69941cb1b..27f22e16e9 100644 --- a/doc/upgrades.mdwn +++ b/doc/upgrades.mdwn @@ -48,6 +48,12 @@ The upgrade events, so far: The upgrade from v5 to v6 is handled manually. Run `git-annex upgrade` perform the upgrade. +Warning: All places that a direct mode repository is cloned to should be +running git-annex version 6.x before you upgrade the repository. +This is necessary because the contents of the repository are changed +in the upgrade, and the old version of git-annex won't be able to +access files after the repo is upgraded. + This upgrade does away with the direct mode/indirect mode distinction. A v6 git-annex repository can have some files locked and other files unlocked, and all git and git-annex commands can be used on both locked and @@ -65,19 +71,13 @@ The behavior of some commands changes in an upgraded repository: * `git annex unlock` and `git annex lock` change how the pointer to the annexed content is stored in git. -All places that a direct mode repository is cloned to should be -running git-annex version 6.x before you upgrade the repository. -This is necessary because the contents of the repository are changed -in the upgrade, and the old version of git-annex won't be able to -access files after the repo is upgraded. - If a repository is only used in indirect mode, you can use git-annex v5 and v6 in different clones of the same indirect mode repository without problems. On upgrade, all files in a direct mode repository will be converted to -unlocked files. The upgrade will need to stage changes to all files in -the git repository. +unlocked files. The upgrade will stage changes to all annexed files in +the git repository, which you can then commit. If a repository has some clones using direct mode and some using indirect mode, all the files will end up unlocked in all clones after the upgrade. From db8b32254cc9a23cdf8405d8e984e63573a88197 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 16:07:02 -0400 Subject: [PATCH 61/96] update todo list --- doc/todo/smudge.mdwn | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 6623a6d187..fe4750ee2b 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -321,14 +321,12 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list -* Dropping a smudged file causes git status to show it as modified, - because the timestamp has changed. Getting a smudged file can also - cause this. Upgrading a direct mode repo also leaves files in this state. - User can use `git add` to clear it up, but better to avoid this, - by updating stat info in the index. - (May need to use libgit2 to do this, cannot find - any plumbing except git-update-index, which is very inneficient for - smudged files.) +* Test suite should have a pass that runs with files unlocked. +* assistant: In v6 mode, adds files in unlocked mode, so they can + continue to be modified. TODO +* When the webapp creates a repo, it forces it into direct mode. But that + will fail when annex.version=6. Long-term, the assistant should make v6 + repos, but short-term, the assistant should make v5 repos in direct mode. * Reconcile staged changes into the associated files database, whenever the database is queried. * See if the cases where the Keys database is not used can be @@ -348,7 +346,14 @@ files to be unlocked, while the indirect upgrades don't touch the files. when pushing changes committed in such a repo. Ideally, should avoid committing implicit unlocks, or should prevent such commits leaking out in pushes. -* Test suite should have a pass that runs with files unlocked. +* Dropping a smudged file causes git status to show it as modified, + because the timestamp has changed. Getting a smudged file can also + cause this. Upgrading a direct mode repo also leaves files in this state. + User can use `git add` to clear it up, but better to avoid this, + by updating stat info in the index. + (May need to use libgit2 to do this, cannot find + any plumbing except git-update-index, which is very inneficient for + smudged files.) ---- From 99f1d7991d1baaecfbd8b54bda3c146d227fb720 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 16:10:48 -0400 Subject: [PATCH 62/96] recent fsck changes caused ugly message when object was not present --- Command/Fsck.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 42bc930749..46de4ac96f 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -299,7 +299,7 @@ verifyDirectMode key file = do -} checkKeySize :: Key -> KeyStatus -> Annex Bool checkKeySize _ KeyUnlocked = return True -checkKeySize key KeyLocked = do +checkKeySize key _ = do file <- calcRepo $ gitAnnexLocation key ifM (liftIO $ doesFileExist file) ( checkKeySizeOr badContent key file @@ -596,16 +596,17 @@ withFsckDb NonIncremental _ = noop withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a #endif -data KeyStatus = KeyLocked | KeyUnlocked +data KeyStatus = KeyLocked | KeyUnlocked | KeyMissing isKeyUnlocked :: KeyStatus -> Bool isKeyUnlocked KeyUnlocked = True isKeyUnlocked KeyLocked = False +isKeyUnlocked KeyMissing = False getKeyStatus :: Key -> Annex KeyStatus getKeyStatus key = ifM isDirect ( return KeyUnlocked - , do + , catchDefaultIO KeyMissing $ do obj <- calcRepo $ gitAnnexLocation key unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) <&&> (not . null <$> Database.Keys.getAssociatedFiles key) From b9588fe69e1dfd8e8230bb3a7bc9f28e21c33326 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 16:18:39 -0400 Subject: [PATCH 63/96] in v6 mode, unannex does not interact badly with pre-commit hook So can be used in a tree with staged changes, no problems. Much nicer. --- Command/Unannex.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Command/Unannex.hs b/Command/Unannex.hs index fdf976d3e0..f7af8cde63 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -15,6 +15,7 @@ import Config import qualified Annex import Annex.Content import Annex.Content.Direct +import Annex.Version import qualified Git.Command import qualified Git.Branch import qualified Git.Ref @@ -32,7 +33,7 @@ seek :: CmdParams -> CommandSeek seek = wrapUnannex . (withFilesInGit $ whenAnnexed start) wrapUnannex :: Annex a -> Annex a -wrapUnannex a = ifM isDirect +wrapUnannex a = ifM (versionSupportsUnlockedPointers <||> isDirect) ( a {- Run with the pre-commit hook disabled, to avoid confusing - behavior if an unannexed file is added back to git as From 7d0e79b9e1353c7c9df8270a8e63fe24e07478d7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 17:17:13 -0400 Subject: [PATCH 64/96] Use git-annex init --version=6 to get v6 for now Not ready to make it default because of the direct mode upgrade needing to all happen at once. --- Annex/Init.hs | 18 +++++++++-------- Annex/MakeRepo.hs | 2 +- Annex/Version.hs | 9 ++++++--- Command/ConfigList.hs | 2 +- Command/Init.hs | 43 +++++++++++++++++++++++++++++------------ Command/Reinit.hs | 2 +- Upgrade.hs | 2 +- Upgrade/V1.hs | 4 ++-- debian/changelog | 4 +++- doc/git-annex-init.mdwn | 7 +++++++ 10 files changed, 63 insertions(+), 30 deletions(-) diff --git a/Annex/Init.hs b/Annex/Init.hs index 997312c31e..99bb03e929 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -57,8 +57,8 @@ genDescription Nothing = do return $ concat [hostname, ":", reldir] #endif -initialize :: Maybe String -> Annex () -initialize mdescription = do +initialize :: Maybe String -> Maybe Version -> Annex () +initialize mdescription mversion = do {- Has to come before any commits are made as the shared - clone heuristic expects no local objects. -} sharedclone <- checkSharedClone @@ -68,7 +68,7 @@ initialize mdescription = do ensureCommit $ Annex.Branch.create prepUUID - initialize' + initialize' mversion initSharedClone sharedclone @@ -77,16 +77,18 @@ initialize mdescription = do -- Everything except for uuid setup, shared clone setup, and initial -- description. -initialize' :: Annex () -initialize' = do +initialize' :: Maybe Version -> Annex () +initialize' mversion = do checkLockSupport checkFifoSupport checkCrippledFileSystem unlessM isBare $ hookWrite preCommitHook setDifferences - setVersion currentVersion - configureSmudgeFilter + unlessM (isJust <$> getVersion) $ + setVersion (fromMaybe defaultVersion mversion) + whenM versionSupportsUnlockedPointers + configureSmudgeFilter ifM (crippledFileSystem <&&> not <$> isBare) ( do enableDirectMode @@ -115,7 +117,7 @@ ensureInitialized :: Annex () ensureInitialized = getVersion >>= maybe needsinit checkUpgrade where needsinit = ifM Annex.Branch.hasSibling - ( initialize Nothing + ( initialize Nothing Nothing , error "First run: git-annex init" ) diff --git a/Annex/MakeRepo.hs b/Annex/MakeRepo.hs index 73443c43d9..adf49ed2c4 100644 --- a/Annex/MakeRepo.hs +++ b/Annex/MakeRepo.hs @@ -75,7 +75,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do initRepo' :: Maybe String -> Maybe StandardGroup -> Annex () initRepo' desc mgroup = unlessM isInitialized $ do - initialize desc + initialize desc Nothing u <- getUUID maybe noop (defaultStandardGroup u) mgroup {- Ensure branch gets committed right away so it is diff --git a/Annex/Version.hs b/Annex/Version.hs index 4c2a990fa8..b54fb68e07 100644 --- a/Annex/Version.hs +++ b/Annex/Version.hs @@ -15,11 +15,14 @@ import qualified Annex type Version = String -currentVersion :: Version -currentVersion = "6" +defaultVersion :: Version +defaultVersion = "5" + +latestVersion :: Version +latestVersion = "6" supportedVersions :: [Version] -supportedVersions = ["5", currentVersion] +supportedVersions = ["5", "6"] upgradableVersions :: [Version] #ifndef mingw32_HOST_OS diff --git a/Command/ConfigList.hs b/Command/ConfigList.hs index 46c909107b..997016e8e3 100644 --- a/Command/ConfigList.hs +++ b/Command/ConfigList.hs @@ -46,7 +46,7 @@ findOrGenUUID = do else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit)) ( do liftIO checkNotReadOnly - initialize Nothing + initialize Nothing Nothing getUUID , return NoUUID ) diff --git a/Command/Init.hs b/Command/Init.hs index d969669f81..94d8168a67 100644 --- a/Command/Init.hs +++ b/Command/Init.hs @@ -10,25 +10,44 @@ module Command.Init where import Common.Annex import Command import Annex.Init +import Annex.Version import qualified Annex.SpecialRemote cmd :: Command cmd = dontCheck repoExists $ command "init" SectionSetup "initialize git-annex" - paramDesc (withParams seek) + paramDesc (seek <$$> optParser) -seek :: CmdParams -> CommandSeek -seek = withWords start +data InitOptions = InitOptions + { initDesc :: String + , initVersion :: Maybe Version + } -start :: [String] -> CommandStart -start ws = do - showStart "init" description - next $ perform description - where - description = unwords ws +optParser :: CmdParamsDesc -> Parser InitOptions +optParser desc = InitOptions + <$> (unwords <$> cmdParams desc) + <*> optional (option (str >>= parseVersion) + ( long "version" <> metavar paramValue + <> help "Override default annex.version" + )) -perform :: String -> CommandPerform -perform description = do - initialize $ if null description then Nothing else Just description +parseVersion :: Monad m => String -> m Version +parseVersion v + | v `elem` supportedVersions = return v + | otherwise = fail $ v ++ " is not a currently supported repository version" + +seek :: InitOptions -> CommandSeek +seek = commandAction . start + +start :: InitOptions -> CommandStart +start os = do + showStart "init" (initDesc os) + next $ perform os + +perform :: InitOptions -> CommandPerform +perform os = do + initialize + (if null (initDesc os) then Nothing else Just (initDesc os)) + (initVersion os) Annex.SpecialRemote.autoEnable next $ return True diff --git a/Command/Reinit.hs b/Command/Reinit.hs index 1be692871d..e2c00a3d29 100644 --- a/Command/Reinit.hs +++ b/Command/Reinit.hs @@ -38,6 +38,6 @@ perform s = do then return $ toUUID s else Remote.nameToUUID s storeUUID u - initialize' + initialize' Nothing Annex.SpecialRemote.autoEnable next $ return True diff --git a/Upgrade.hs b/Upgrade.hs index 1f4a8d8dec..f9dfb72589 100644 --- a/Upgrade.hs +++ b/Upgrade.hs @@ -41,7 +41,7 @@ upgrade :: Bool -> Annex Bool upgrade automatic = do upgraded <- go =<< getVersion when upgraded $ - setVersion currentVersion + setVersion latestVersion return upgraded where #ifndef mingw32_HOST_OS diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index bcf7e0b6de..507af9e3b3 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -54,14 +54,14 @@ upgrade = do ifM (fromRepo Git.repoIsLocalBare) ( do moveContent - setVersion currentVersion + setVersion latestVersion , do moveContent updateSymlinks moveLocationLogs Annex.Queue.flush - setVersion currentVersion + setVersion latestVersion ) Upgrade.V2.upgrade diff --git a/debian/changelog b/debian/changelog index 21d875d1d8..838b2d39cc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,6 @@ git-annex (6.20151225) unstable; urgency=medium - * annex.version increased to 6, but version 5 is also still supported. + * Added v6 repository mode, but v5 is still the default for now. * The upgrade to version 6 is not done fully automatically, because upgrading a direct mode repository to version 6 will prevent old versions of git-annex from working in other clones of that repository. @@ -12,6 +12,8 @@ git-annex (6.20151225) unstable; urgency=medium * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a pointer file, and this change can be committed to the git repository. * add: In v6 mode, adds modified files to the annex. + * init: --version parameter added to control which supported repository + version to use. -- Joey Hess Tue, 08 Dec 2015 11:14:03 -0400 diff --git a/doc/git-annex-init.mdwn b/doc/git-annex-init.mdwn index 1457051057..29522181dc 100644 --- a/doc/git-annex-init.mdwn +++ b/doc/git-annex-init.mdwn @@ -24,6 +24,13 @@ mark it as dead (see [[git-annex-dead]](1)). This command is entirely safe, although usually pointless, to run inside an already initialized git-annex repository. +# OPTIONS + +* `--version=N` + + Force the repository to be initialized using a different annex.version + than the current default. + # SEE ALSO [[git-annex]](1) From 7800125783a812cf0c03d7305d8b635ba582cff7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Dec 2015 17:19:26 -0400 Subject: [PATCH 65/96] starting to work on test suite for v6 --- Test.hs | 58 ++++++++++++++++++++++++++------------------ doc/todo/smudge.mdwn | 7 +++++- 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/Test.hs b/Test.hs index 1a0601b35c..5207385b52 100644 --- a/Test.hs +++ b/Test.hs @@ -38,6 +38,7 @@ import Common import qualified Utility.SafeCommand import qualified Annex import qualified Annex.UUID +import qualified Annex.Version import qualified Backend import qualified Git.CurrentRepo import qualified Git.Filename @@ -118,18 +119,17 @@ ingredients = ] tests :: TestTree -tests = testGroup "Tests" - -- Test both direct and indirect mode. - -- Windows is only going to use direct mode, so don't test twice. - [ properties +tests = testGroup "Tests" $ properties : + map (\(d, te) -> withTestMode te (unitTests d)) testmodes + where + testmodes = + [ ("v5", TestMode { forceDirect = False, annexVersion = "5" }) + -- Windows will only use direct mode, so don't test twice. #ifndef mingw32_HOST_OS - , withTestEnv True $ unitTests "(direct)" - , withTestEnv False $ unitTests "(indirect)" -#else - , withTestEnv False $ unitTests "" + , ("v5 direct", TestMode { forceDirect = True, annexVersion = "5" }) + , ("v6", TestMode { forceDirect = False, annexVersion = "6" }) + ] #endif - ] - properties :: TestTree properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck" @@ -244,7 +244,7 @@ unitTests note = testGroup ("Unit Tests " ++ note) test_init :: Assertion test_init = innewrepo $ do git_annex "init" [reponame] @? "init failed" - handleforcedirect + setupTestMode where reponame = "test repo" @@ -1506,7 +1506,7 @@ intmpclonerepoInDirect a = intmpclonerepo $ ) where isdirect = annexeval $ do - Annex.Init.initialize Nothing + Annex.Init.initialize Nothing Nothing Config.isDirect checkRepo :: Types.Annex a -> FilePath -> IO a @@ -1589,7 +1589,7 @@ clonerepo old new cfg = do git_annex "init" ["-q", new] @? "git annex init failed" unless (bareClone cfg) $ indir new $ - handleforcedirect + setupTestMode return new configrepo :: FilePath -> IO () @@ -1600,10 +1600,6 @@ configrepo dir = indir dir $ do -- avoid signed commits by test suite boolSystem "git" [Param "config", Param "commit.gpgsign", Param "false"] @? "git config failed" -handleforcedirect :: IO () -handleforcedirect = whenM ((==) "1" <$> Utility.Env.getEnvDefault "FORCEDIRECT" "") $ - git_annex "direct" ["-q"] @? "git annex direct failed" - ensuretmpdir :: IO () ensuretmpdir = do e <- doesDirectoryExist tmpdir @@ -1722,11 +1718,16 @@ annexed_present = runchecks unannexed :: FilePath -> Assertion unannexed = runchecks [checkregularfile, checkcontent, checkwritable] -withTestEnv :: Bool -> TestTree -> TestTree -withTestEnv forcedirect = withResource prepare release . const +data TestMode = TestMode + { forceDirect :: Bool + , annexVersion :: String + } deriving (Read, Show) + +withTestMode :: TestMode -> TestTree -> TestTree +withTestMode testmode = withResource prepare release . const where prepare = do - setTestEnv forcedirect + setTestMode testmode case tryIngredients [consoleTestReporter] mempty initTests of Nothing -> error "No tests found!?" Just act -> unlessM act $ @@ -1734,8 +1735,8 @@ withTestEnv forcedirect = withResource prepare release . const return () release _ = cleanup' True tmpdir -setTestEnv :: Bool -> IO () -setTestEnv forcedirect = do +setTestMode :: TestMode -> IO () +setTestMode testmode = do whenM (doesDirectoryExist tmpdir) $ error $ "The temporary directory " ++ tmpdir ++ " already exists; cannot run test suite." @@ -1755,9 +1756,20 @@ setTestEnv forcedirect = do , ("GIT_COMMITTER_NAME", "git-annex test") -- force gpg into batch mode for the tests , ("GPG_BATCH", "1") - , ("FORCEDIRECT", if forcedirect then "1" else "") + , ("TESTMODE", show testmode) ] +getTestMode :: IO TestMode +getTestMode = Prelude.read <$> Utility.Env.getEnvDefault "TESTMODE" "" + +setupTestMode :: IO () +setupTestMode = do + testmode <- getTestMode + annexeval $ + Annex.Version.setVersion (annexVersion testmode) + when (forceDirect testmode) $ + git_annex "direct" ["-q"] @? "git annex direct failed" + changeToTmpDir :: FilePath -> IO () changeToTmpDir t = do topdir <- Utility.Env.getEnvDefault "TOPDIR" (error "TOPDIR not set") diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index fe4750ee2b..e1d54cf7f4 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -321,7 +321,12 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list -* Test suite should have a pass that runs with files unlocked. +* Test suite should have passes for: + v5 indirect + v5 direct + v6 locked + v6 unlocked + Currently, the test suite fails horribly. * assistant: In v6 mode, adds files in unlocked mode, so they can continue to be modified. TODO * When the webapp creates a repo, it forces it into direct mode. But that From 1e016611de80adcaed97af8c1dd2e9667de29b99 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 13:14:18 -0400 Subject: [PATCH 66/96] pass --version to init when needed --- Test.hs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Test.hs b/Test.hs index 5207385b52..5fbadd1d5b 100644 --- a/Test.hs +++ b/Test.hs @@ -243,7 +243,10 @@ unitTests note = testGroup ("Unit Tests " ++ note) -- this test case create the main repo test_init :: Assertion test_init = innewrepo $ do - git_annex "init" [reponame] @? "init failed" + ver <- annexVersion <$> getTestMode + if ver == Annex.Version.defaultVersion + then git_annex "init" [reponame] @? "init failed" + else git_annex "init" [reponame, "--version", ver] @? "init failed" setupTestMode where reponame = "test repo" @@ -1585,8 +1588,11 @@ clonerepo old new cfg = do ] boolSystem "git" cloneparams @? "git clone failed" configrepo new - indir new $ - git_annex "init" ["-q", new] @? "git annex init failed" + indir new $ do + ver <- annexVersion <$> getTestMode + if ver == Annex.Version.defaultVersion + then git_annex "init" ["-q", new] @? "git annex init failed" + else git_annex "init" ["-q", new, "--version", ver] @? "git annex init failed" unless (bareClone cfg) $ indir new $ setupTestMode @@ -1765,8 +1771,6 @@ getTestMode = Prelude.read <$> Utility.Env.getEnvDefault "TESTMODE" "" setupTestMode :: IO () setupTestMode = do testmode <- getTestMode - annexeval $ - Annex.Version.setVersion (annexVersion testmode) when (forceDirect testmode) $ git_annex "direct" ["-q"] @? "git annex direct failed" From 1a051f4300f9ae56e93527485b1aa7cc0fd12332 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 13:24:45 -0400 Subject: [PATCH 67/96] comment --- Database/Keys.hs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Database/Keys.hs b/Database/Keys.hs index 62c7c25eb4..d4b5b33574 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -54,7 +54,14 @@ Content KeyCacheIndex key cache |] -{- Opens the database, creating it if it doesn't exist yet. -} +{- Opens the database, creating it if it doesn't exist yet. + - + - Multiple readers and writers can have the database open at the same + - time. Database.Handle deals with the concurrency issues. + - The lock is held while opening the database, so that when + - the database doesn't exist yet, one caller wins the lock and + - can create it undisturbed. + -} openDb :: Annex DbHandle openDb = withExclusiveLock gitAnnexKeysDbLock $ do dbdir <- fromRepo gitAnnexKeysDb From 622da992f894ece03a2f9502c572f8a0d55410de Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 13:52:43 -0400 Subject: [PATCH 68/96] reorder database shutdown to be concurrency safe If a DbHandle is in use by another thread, it could be queueing changes while shutdown is running. So, wait for the worker to finish before flushing the queue, so that any last-minute writes are included. Before this fix, they would be silently dropped. Of course, if the other thread continues to try to use a DbHandle once it's closed, it will block forever as the worker is no longer reading from the jobs MVar. So, that would crash with "thread blocked indefinitely in an MVar operation". --- Database/Handle.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Database/Handle.hs b/Database/Handle.hs index 6d312df685..67f7592657 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -142,9 +142,9 @@ queryDb (DbHandle _ jobs _) a = do closeDb :: DbHandle -> IO () closeDb h@(DbHandle worker jobs _) = do - flushQueueDb h putMVar jobs CloseJob wait worker + flushQueueDb h type Size = Int From 38a23928e9d45b56d6836a4eac703862d63cf93c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 14:05:26 -0400 Subject: [PATCH 69/96] temporarily remove cached keys database connection The problem is that shutdown is not always called, particularly in the test suite. So, a database connection would be opened, possibly some changes queued, and then not shut down. One way this can happen is when using Annex.eval or Annex.run with a new state. A better fix might be to make both of them call Keys.shutdown (and be sure to do it even if the annex action threw an error). Complication: Sometimes they're run reusing an existing state, so shutting down a database connection could cause problems for other users of that same state. I think this would need a MVar holding the database handle, so it could be emptied once shut down, and another user of the database connection could then start up a new one if it got shut down. But, what if 2 threads were concurrently using the same database handle and one shut it down while the other was writing to it? Urgh. Might have to go that route eventually to get the database access to run fast enough. For now, a quick fix to get the test suite happier, at the expense of speed. --- Annex.hs | 3 --- Annex/Action.hs | 2 -- Database/Keys.hs | 20 +------------------- 3 files changed, 1 insertion(+), 24 deletions(-) diff --git a/Annex.hs b/Annex.hs index c4df0b92f6..c9a4ef6a05 100644 --- a/Annex.hs +++ b/Annex.hs @@ -60,7 +60,6 @@ import Types.NumCopies import Types.LockCache import Types.DesktopNotify import Types.CleanupActions -import qualified Database.Keys.Types #ifdef WITH_QUVI import Utility.Quvi (QuviVersion) #endif @@ -135,7 +134,6 @@ data AnnexState = AnnexState , desktopnotify :: DesktopNotify , workers :: [Either AnnexState (Async AnnexState)] , concurrentjobs :: Maybe Int - , keysdbhandle :: Maybe Database.Keys.Types.DbHandle } newState :: GitConfig -> Git.Repo -> AnnexState @@ -181,7 +179,6 @@ newState c r = AnnexState , desktopnotify = mempty , workers = [] , concurrentjobs = Nothing - , keysdbhandle = Nothing } {- Makes an Annex state object for the specified git repo. diff --git a/Annex/Action.hs b/Annex/Action.hs index 348487e7c9..f59c9c2f45 100644 --- a/Annex/Action.hs +++ b/Annex/Action.hs @@ -17,7 +17,6 @@ import System.Posix.Signals import Common.Annex import qualified Annex import Annex.Content -import qualified Database.Keys {- Actions to perform each time ran. -} startup :: Annex () @@ -33,5 +32,4 @@ shutdown :: Bool -> Annex () shutdown nocommit = do saveState nocommit sequence_ =<< M.elems <$> Annex.getState Annex.cleanup - Database.Keys.shutdown liftIO reapZombies -- zombies from long-running git processes diff --git a/Database/Keys.hs b/Database/Keys.hs index d4b5b33574..a0c5b1a04c 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -14,7 +14,6 @@ module Database.Keys ( DbHandle, openDb, closeDb, - shutdown, addAssociatedFile, getAssociatedFiles, getAssociatedKey, @@ -84,24 +83,7 @@ closeDb :: DbHandle -> IO () closeDb (DbHandle h) = H.closeDb h withDbHandle :: (H.DbHandle -> IO a) -> Annex a -withDbHandle a = do - (DbHandle h) <- dbHandle - liftIO $ a h - -dbHandle :: Annex DbHandle -dbHandle = maybe startup return =<< Annex.getState Annex.keysdbhandle - where - startup = do - h <- openDb - Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h } - return h - -shutdown :: Annex () -shutdown = maybe noop go =<< Annex.getState Annex.keysdbhandle - where - go h = do - Annex.changeState $ \s -> s { Annex.keysdbhandle = Nothing } - liftIO $ closeDb h +withDbHandle a = bracket openDb (liftIO . closeDb) (\(DbHandle h) -> liftIO (a h)) addAssociatedFile :: Key -> FilePath -> Annex () addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do From 35f6a78b6635c290789e5102ab5000cebd9bf619 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 14:27:12 -0400 Subject: [PATCH 70/96] fix reversion in v5 git-annex add of unlocked file In v5, lookupFile is supposed to only look at symlinks on disk (except when in direct mode). Note that v6 also has a bug when a locked file's symlink is deleted and is replaced with a new file. It sees that a link is staged and gets that key. --- Annex/WorkTree.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 26144e7f9e..c824e7fc59 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -10,6 +10,8 @@ module Annex.WorkTree where import Common.Annex import Annex.Link import Annex.CatFile +import Annex.Version +import Config {- Looks up the key corresponding to an annexed file, - by examining what the file links to. @@ -22,7 +24,10 @@ lookupFile file = do mkey <- isAnnexLink file case mkey of Just key -> makeret key - Nothing -> maybe (return Nothing) makeret =<< catKeyFile file + Nothing -> ifM (versionSupportsUnlockedPointers <||> isDirect) + ( maybe (return Nothing) makeret =<< catKeyFile file + , return Nothing + ) where makeret = return . Just From 2d343224dc0ca67604c1e34b2f77032586a38c8e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 14:53:41 -0400 Subject: [PATCH 71/96] fix add of file that was locked but has been replaced by a new, unlocked file (v6) --- Command/Add.hs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Command/Add.hs b/Command/Add.hs index 8cbaf189af..ab4e3a9d17 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -34,6 +34,7 @@ import Utility.Tmp import Utility.CopyFile import Annex.InodeSentinal import Annex.Version +import qualified Database.Keys import Control.Exception (IOException) @@ -105,13 +106,22 @@ start file = ifAnnexed file addpresent add next $ if isSymbolicLink s then next $ addFile file else perform file - addpresent key = ifM isDirect + addpresent key = ifM versionSupportsUnlockedPointers ( do ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file case ms of Just s | isSymbolicLink s -> fixup key - _ -> ifM (goodContent key file) ( stop , add ) - , fixup key + _ -> ifM (sameInodeCache file =<< Database.Keys.getInodeCaches key) + ( stop, add ) + , ifM isDirect + ( do + ms <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file + case ms of + Just s | isSymbolicLink s -> fixup key + _ -> ifM (goodContent key file) + ( stop , add ) + , fixup key + ) ) fixup key = do -- the annexed symlink is present but not yet added to git From 6b717032c56a26cc6131e4d79b8b3b769f0fcc5e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 15:35:42 -0400 Subject: [PATCH 72/96] v6: fix locking modified file when the content is not present --- Command/Lock.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Command/Lock.hs b/Command/Lock.hs index 16ddce9424..741c18c150 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -65,15 +65,16 @@ performNew file key filemodified = do next $ cleanupNew file key where lockdown obj = do - ifM (sameInodeCache obj =<< Database.Keys.getInodeCaches key) + ifM (catchBoolIO $ sameInodeCache obj =<< Database.Keys.getInodeCaches key) ( breakhardlink obj , repopulate obj ) - freezeContent obj + whenM (liftIO $ doesFileExist obj) $ + freezeContent obj -- It's ok if the file is hard linked to obj, but if some other -- associated file is, we need to break that link to lock down obj. - breakhardlink obj = whenM ((> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do + breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do mfc <- withTSDelta (liftIO . genInodeCache file) unlessM (sameInodeCache obj (maybeToList mfc)) $ do modifyContent obj $ replaceFile obj $ \tmp -> do From a858a331c4f547e3a970198277f4ec970387d422 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 16:00:41 -0400 Subject: [PATCH 73/96] fix docs "git -a annex.largefiles=* add" cannot be used.. git does not pass the config along to the smudge filter, sadly. --- doc/git-annex-smudge.mdwn | 6 +----- doc/upgrades.mdwn | 9 +++++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/git-annex-smudge.mdwn b/doc/git-annex-smudge.mdwn index c8e5453671..7439c87844 100644 --- a/doc/git-annex-smudge.mdwn +++ b/doc/git-annex-smudge.mdwn @@ -14,11 +14,7 @@ of being symlinks. When adding a file with `git add`, the annex.largefiles config is consulted to decide if a given file should be added to git as-is, -or if its content are large enough to need to use git-annex. To force a -file that would normally be added to the annex to be added to git as-is, -this can be temporarily overridden. For example: - - git -c annex.largefiles='exclude=*' add myfile +or if its content are large enough to need to use git-annex. The git configuration to use this command as a filter driver is as follows. This is normally set up for you by git-annex init, so you should diff --git a/doc/upgrades.mdwn b/doc/upgrades.mdwn index 27f22e16e9..9d30c2f14f 100644 --- a/doc/upgrades.mdwn +++ b/doc/upgrades.mdwn @@ -63,10 +63,11 @@ must support symbolic links..) The behavior of some commands changes in an upgraded repository: * `git add` will add files to the annex, in unlocked mode, rather than - adding them directly to the git repository. To bypass that and add a file - directly to git, use: - - git -c annex.largefiles='exclude=*' add myfile + adding them directly to the git repository. To cause some files to be + added directly to git, you can configure `annex.largefiles`. For + example: + + git config annex.largefiles "largerthan=100kb and not (include=*.c or include=*.h)" * `git annex unlock` and `git annex lock` change how the pointer to the annexed content is stored in git. From e61f3d1752c864b1846cae1bdd690c999b438400 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 16:02:21 -0400 Subject: [PATCH 74/96] update todo list --- doc/todo/smudge.mdwn | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index e1d54cf7f4..cc6793d5cb 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -321,12 +321,8 @@ files to be unlocked, while the indirect upgrades don't touch the files. #### implementation todo list -* Test suite should have passes for: - v5 indirect - v5 direct - v6 locked - v6 unlocked - Currently, the test suite fails horribly. +* Still a few test suite failues for v6 with locked files. +* Test suite should make pass for v6 with unlocked files. * assistant: In v6 mode, adds files in unlocked mode, so they can continue to be modified. TODO * When the webapp creates a repo, it forces it into direct mode. But that @@ -334,6 +330,10 @@ files to be unlocked, while the indirect upgrades don't touch the files. repos, but short-term, the assistant should make v5 repos in direct mode. * Reconcile staged changes into the associated files database, whenever the database is queried. +* A new connection to the Keys database is opened each time. + It would be more efficient to reuse a connection. + However, that needs a way to close the connection, which was a problem. + See 38a23928e9d45b56d6836a4eac703862d63cf93c for details. * See if the cases where the Keys database is not used can be optimised. Eg, if the Keys database doesn't exist at all, we know smudge/clean are not used, so queries don't From a0498b47de46ca2469b7562aa72df392043d2bed Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 16:56:27 -0400 Subject: [PATCH 75/96] test suite down to only 4 fails with v6 --- Test.hs | 74 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/Test.hs b/Test.hs index 5fbadd1d5b..4656e0a2b0 100644 --- a/Test.hs +++ b/Test.hs @@ -123,11 +123,11 @@ tests = testGroup "Tests" $ properties : map (\(d, te) -> withTestMode te (unitTests d)) testmodes where testmodes = - [ ("v5", TestMode { forceDirect = False, annexVersion = "5" }) + [ ("v6", TestMode { forceDirect = False, annexVersion = "6" }) + , ("v5", TestMode { forceDirect = False, annexVersion = "5" }) -- Windows will only use direct mode, so don't test twice. #ifndef mingw32_HOST_OS , ("v5 direct", TestMode { forceDirect = True, annexVersion = "5" }) - , ("v6", TestMode { forceDirect = False, annexVersion = "6" }) ] #endif @@ -298,7 +298,6 @@ test_shared_clone = intmpsharedclonerepo $ do , "--get" , "annex.hardlink" ] - print v v == Just "true\n" @? "shared clone of repo did not get annex.hardlink set" @@ -538,10 +537,13 @@ test_lock = intmpclonerepoInDirect $ do annexed_notpresent annexedfile -- regression test: unlock of newly added, not committed file - -- should fail + -- should fail in v5 mode. In v6 mode, this is allowed. writeFile "newfile" "foo" git_annex "add" ["newfile"] @? "add new file failed" - not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( git_annex "unlock" ["newfile"] @? "unlock failed on newly added, never committed file in v6 repository" + , not <$> git_annex "unlock" ["newfile"] @? "unlock failed to fail on newly added, never committed file in v5 repository" + ) git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile @@ -553,12 +555,21 @@ test_lock = intmpclonerepoInDirect $ do writeFile annexedfile $ content annexedfile ++ "foo" not <$> git_annex "lock" [annexedfile] @? "lock failed to fail without --force" git_annex "lock" ["--force", annexedfile] @? "lock --force failed" + -- In v6 mode, the original content of the file is not always + -- preserved after modification, so re-get it. + git_annex "get" [annexedfile] @? "get of file failed after lock --force" annexed_present annexedfile git_annex "unlock" [annexedfile] @? "unlock failed" unannexed annexedfile changecontent annexedfile - git_annex "add" [annexedfile] @? "add of modified file failed" - runchecks [checklink, checkunwritable] annexedfile + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( do + boolSystem "git" [Param "add", Param annexedfile] @? "add of modified file failed" + runchecks [checkregularfile, checkwritable] annexedfile + , do + git_annex "add" [annexedfile] @? "add of modified file failed" + runchecks [checklink, checkunwritable] annexedfile + ) c <- readFile annexedfile assertEqual "content of modified file" c (changedcontent annexedfile) r' <- git_annex "drop" [annexedfile] @@ -584,7 +595,10 @@ test_edit' precommit = intmpclonerepoInDirect $ do @? "pre-commit failed" else boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "contentchanged"] @? "git commit of edited file failed" - runchecks [checklink, checkunwritable] annexedfile + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( runchecks [checkregularfile, checkwritable] annexedfile + , runchecks [checklink, checkunwritable] annexedfile + ) c <- readFile annexedfile assertEqual "content of modified file" c (changedcontent annexedfile) not <$> git_annex "drop" [annexedfile] @? "drop wrongly succeeded with no known copy of modified file" @@ -594,8 +608,12 @@ test_partial_commit = intmpclonerepoInDirect $ do git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile git_annex "unlock" [annexedfile] @? "unlock failed" - not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] - @? "partial commit of unlocked file not blocked by pre-commit hook" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] + @? "partial commit of unlocked file should be allowed in v6 repository" + , not <$> boolSystem "git" [Param "commit", Param "-q", Param "-m", Param "test", File annexedfile] + @? "partial commit of unlocked file not blocked by pre-commit hook" + ) test_fix :: Assertion test_fix = intmpclonerepoInDirect $ do @@ -621,9 +639,13 @@ test_direct :: Assertion test_direct = intmpclonerepoInDirect $ do git_annex "get" [annexedfile] @? "get of file failed" annexed_present annexedfile - git_annex "direct" [] @? "switch to direct mode failed" - annexed_present annexedfile - git_annex "indirect" [] @? "switch to indirect mode failed" + ifM (annexeval Annex.Version.versionSupportsUnlockedPointers) + ( not <$> git_annex "direct" [] @? "switch to direct mode failed to fail in v6 repository" + , do + git_annex "direct" [] @? "switch to direct mode failed" + annexed_present annexedfile + git_annex "indirect" [] @? "switch to indirect mode failed" + ) test_trust :: Assertion test_trust = intmpclonerepo $ do @@ -1060,8 +1082,9 @@ test_nonannexed_file_conflict_resolution :: Assertion test_nonannexed_file_conflict_resolution = do check True False check False False - check True True - check False True + whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do + check True True + check False True where check inr1 switchdirect = withtmpclonerepo $ \r1 -> withtmpclonerepo $ \r2 -> @@ -1110,8 +1133,9 @@ test_nonannexed_symlink_conflict_resolution :: Assertion test_nonannexed_symlink_conflict_resolution = do check True False check False False - check True True - check False True + whenM (annexeval Annex.Version.versionSupportsDirectMode) $ do + check True True + check False True where check inr1 switchdirect = withtmpclonerepo $ \r1 -> withtmpclonerepo $ \r2 -> @@ -1669,10 +1693,10 @@ checkunwritable f = unlessM (annexeval Config.isDirect) $ do checkwritable :: FilePath -> Assertion checkwritable f = do - r <- tryIO $ writeFile f $ content f - case r of - Left _ -> assertFailure $ "unable to modify " ++ f - Right _ -> return () + s <- getFileStatus f + let mode = fileMode s + unless (mode == mode `unionFileModes` ownerWriteMode) $ + assertFailure $ "unable to modify " ++ f checkdangling :: FilePath -> Assertion checkdangling f = ifM (annexeval Config.crippledFileSystem) @@ -1773,6 +1797,12 @@ setupTestMode = do testmode <- getTestMode when (forceDirect testmode) $ git_annex "direct" ["-q"] @? "git annex direct failed" + whenM (annexeval Annex.Version.versionSupportsUnlockedPointers) $ + boolSystem "git" + [ Param "config" + , Param "annex.largefiles" + , Param ("exclude=" ++ ingitfile) + ] @? "git config annex.largefiles failed" changeToTmpDir :: FilePath -> IO () changeToTmpDir t = do @@ -1808,7 +1838,7 @@ sha1annexedfiledup :: String sha1annexedfiledup = "sha1foodup" ingitfile :: String -ingitfile = "bar" +ingitfile = "bar.c" content :: FilePath -> String content f From e55ac3d3836026171feee9414edc695fa11c87bf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 16 Dec 2015 17:04:31 -0400 Subject: [PATCH 76/96] update --- doc/todo/smudge.mdwn | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index cc6793d5cb..7a254322aa 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -329,7 +329,13 @@ files to be unlocked, while the indirect upgrades don't touch the files. will fail when annex.version=6. Long-term, the assistant should make v6 repos, but short-term, the assistant should make v5 repos in direct mode. * Reconcile staged changes into the associated files database, whenever - the database is queried. + the database is queried. This is needed to handle eg: + git add largefile + git mv largefile othername + git annex move othername --to foo + # fails to drop content from associated file othername, + # because it doesn't know it has that name + # git commit clears up this mess * A new connection to the Keys database is opened each time. It would be more efficient to reuse a connection. However, that needs a way to close the connection, which was a problem. From fbf6c25de56fbdf25184052b7e38c4cedfe2a5bf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2015 18:46:52 -0400 Subject: [PATCH 77/96] interaction with shared clones --- doc/todo/smudge.mdwn | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 7a254322aa..63f05c42bf 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -348,6 +348,11 @@ files to be unlocked, while the indirect upgrades don't touch the files. (when not in direct mode). However, beware over-optimisation breaking the assistant or perhaps other long-lived processes. +* Interaction with shared clones. Should avoid hard linking from/to a + object in a shared clone if either repository has the object unlocked. + (And should avoid unlocking an object if it's hard linked to a shared clone, + but that's already accomplished because it avoids unlocking an object if + it's hard linked at all) * Make automatic merge conflict resolution work for pointer files. - Should probably automatically handle merge conflicts between annex symlinks and pointer files too. Maybe by always resulting in a pointer From 4cf9efb51ac46443d8b059a67ddcfe2e10f40ece Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 21 Dec 2015 18:00:13 -0400 Subject: [PATCH 78/96] remove (v6) associated file in unannex --- Command/Unannex.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Command/Unannex.hs b/Command/Unannex.hs index f7af8cde63..9bde191068 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -22,6 +22,7 @@ import qualified Git.Ref import qualified Git.DiffTree as DiffTree import Utility.CopyFile import Command.PreCommit (lockPreCommitHook) +import qualified Database.Keys cmd :: Command cmd = withGlobalOptions annexedMatchingOptions $ @@ -86,6 +87,7 @@ performIndirect file key = do cleanupIndirect :: FilePath -> Key -> CommandCleanup cleanupIndirect file key = do + Database.Keys.removeAssociatedFile key file src <- calcRepo $ gitAnnexLocation key ifM (Annex.getState Annex.fast) ( do From ca2c9777043c8f5f96f8c71459716247bd2b672e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 21 Dec 2015 18:41:15 -0400 Subject: [PATCH 79/96] wip v6 support for assistant Files are not yet added to v6 repos in unlocked mode. --- Annex/Content/Direct.hs | 4 -- Annex/InodeSentinal.hs | 3 ++ Assistant/Threads/Committer.hs | 83 ++++++++++++++++++++-------------- Assistant/Threads/Watcher.hs | 62 ++++++++++++++++++------- debian/changelog | 8 ++-- doc/todo/smudge.mdwn | 2 - 6 files changed, 103 insertions(+), 59 deletions(-) diff --git a/Annex/Content/Direct.hs b/Annex/Content/Direct.hs index 59bea8f99b..3b9d1aea2c 100644 --- a/Annex/Content/Direct.hs +++ b/Annex/Content/Direct.hs @@ -21,7 +21,6 @@ module Annex.Content.Direct ( addInodeCache, writeInodeCache, compareInodeCaches, - compareInodeCachesWith, sameInodeCache, elemInodeCaches, sameFileStatus, @@ -172,9 +171,6 @@ sameFileStatus key f status = do ([], Nothing) -> return True _ -> return False -compareInodeCachesWith :: Annex InodeComparisonType -compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) - {- Copies the contentfile to the associated file, if the associated - file has no content. If the associated file does have content, - even if the content differs, it's left unchanged. -} diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs index 8b48094dfc..412a7accc1 100644 --- a/Annex/InodeSentinal.hs +++ b/Annex/InodeSentinal.hs @@ -24,6 +24,9 @@ compareInodeCaches x y , return False ) +compareInodeCachesWith :: Annex InodeComparisonType +compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) + {- Checks if one of the provided old InodeCache matches the current - version of a file. -} sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 745047d9dc..891df8419b 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -31,9 +31,11 @@ import Annex.Content import Annex.Link import Annex.CatFile import Annex.InodeSentinal +import Annex.Version import qualified Annex import Utility.InodeCache import Annex.Content.Direct +import qualified Database.Keys import qualified Command.Sync import qualified Git.Branch @@ -228,12 +230,11 @@ commitStaged msg = do return ok {- OSX needs a short delay after a file is added before locking it down, - - when using a non-direct mode repository, as pasting a file seems to - - try to set file permissions or otherwise access the file after closing - - it. -} + - as pasting a file seems to try to set file permissions or otherwise + - access the file after closing it. -} delayaddDefault :: Annex (Maybe Seconds) #ifdef darwin_HOST_OS -delayaddDefault = ifM isDirect +delayaddDefault = ifM (isDirect || versionSupportsUnlockedPointers) ( return Nothing , return $ Just $ Seconds 1 ) @@ -250,12 +251,11 @@ delayaddDefault = return Nothing - for write by some other process, and faster checking with git-ls-files - that the files are not already checked into git. - - - When a file is added, Inotify will notice the new symlink. So this waits - - for additional Changes to arrive, so that the symlink has hopefully been - - staged before returning, and will be committed immediately. - - - - OTOH, for kqueue, eventsCoalesce, so instead the symlink is directly - - created and staged. + - When a file is added in locked mode, Inotify will notice the new symlink. + - So this waits for additional Changes to arrive, so that the symlink has + - hopefully been staged before returning, and will be committed immediately. + - (OTOH, for kqueue, eventsCoalesce, so instead the symlink is directly + - created and staged.) - - Returns a list of all changes that are ready to be committed. - Any pending adds that are not ready yet are put back into the ChangeChan, @@ -265,7 +265,8 @@ handleAdds :: Bool -> Maybe Seconds -> [Change] -> Assistant [Change] handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do let (pending, inprocess) = partition isPendingAddChange incomplete direct <- liftAnnex isDirect - (pending', cleanup) <- if direct + unlocked <- liftAnnex versionSupportsUnlockedPointers + (pending', cleanup) <- if unlocked || direct then return (pending, noop) else findnew pending (postponed, toadd) <- partitionEithers <$> safeToAdd havelsof delayadd pending' inprocess @@ -276,10 +277,11 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do returnWhen (null toadd) $ do added <- addaction toadd $ - catMaybes <$> if direct - then adddirect toadd - else forM toadd add - if DirWatcher.eventsCoalesce || null added || direct + catMaybes <$> + if unlocked || direct + then addunlocked direct toadd + else forM toadd add + if DirWatcher.eventsCoalesce || null added || unlocked || direct then return $ added ++ otherchanges else do r <- handleAdds havelsof delayadd =<< getChanges @@ -316,15 +318,15 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do maybe (failedingest change) (done change mcache $ keyFilename ks) mkey add _ = return Nothing - {- In direct mode, avoid overhead of re-injesting a renamed - - file, by examining the other Changes to see if a removed - - file has the same InodeCache as the new file. If so, - - we can just update bookkeeping, and stage the file in git. + {- Avoid overhead of re-injesting a renamed unlocked file, by + - examining the other Changes to see if a removed file has the + - same InodeCache as the new file. If so, we can just update + - bookkeeping, and stage the file in git. -} - adddirect :: [Change] -> Assistant [Maybe Change] - adddirect toadd = do + addunlocked :: Bool -> [Change] -> Assistant [Maybe Change] + addunlocked isdirect toadd = do ct <- liftAnnex compareInodeCachesWith - m <- liftAnnex $ removedKeysMap ct cs + m <- liftAnnex $ removedKeysMap isdirect ct cs delta <- liftAnnex getTSDelta if M.null m then forM toadd add @@ -335,22 +337,33 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do Just cache -> case M.lookup (inodeCacheToKey ct cache) m of Nothing -> add c - Just k -> fastadd c k + Just k -> if isdirect + then fastadddirect c k + else fastaddunlocked c k - fastadd :: Change -> Key -> Assistant (Maybe Change) - fastadd change key = do + fastadddirect :: Change -> Key -> Assistant (Maybe Change) + fastadddirect change key = do let source = keySource change liftAnnex $ Command.Add.finishIngestDirect key source done change Nothing (keyFilename source) key + + fastaddunlocked :: Change -> Key -> Assistant (Maybe Change) + fastaddunlocked change key = do + let source = keySource change + liftAnnex $ do + Database.Keys.addAssociatedFile key (keyFilename source) + done change Nothing (keyFilename source) key - removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) - removedKeysMap ct l = do + removedKeysMap :: Bool -> InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) + removedKeysMap isdirect ct l = do mks <- forM (filter isRmChange l) $ \c -> catKeyFile $ changeFile c M.fromList . concat <$> mapM mkpairs (catMaybes mks) where mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$> - recordedInodeCache k + if isdirect + then recordedInodeCache k + else Database.Keys.getInodeCaches k failedingest change = do refill [retryChange change] @@ -359,12 +372,16 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do done change mcache file key = liftAnnex $ do logStatus key InfoPresent - link <- ifM isDirect - ( calcRepo $ gitAnnexLink file key - , Command.Add.link file key mcache + ifM versionSupportsUnlockedPointers + ( stagePointerFile file =<< hashPointerFile key + , do + link <- ifM isDirect + ( calcRepo $ gitAnnexLink file key + , Command.Add.link file key mcache + ) + whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $ + stageSymlink file =<< hashSymlink link ) - whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $ - stageSymlink file =<< hashSymlink link showEndOk return $ Just $ finishedChange change key diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 37e0154b45..bb9659b7cf 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -1,6 +1,6 @@ {- git-annex assistant tree watcher - - - Copyright 2012-2013 Joey Hess + - Copyright 2012-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -36,10 +36,15 @@ import Annex.CheckIgnore import Annex.Link import Annex.FileMatcher import Types.FileMatcher +import Annex.Content import Annex.ReplaceFile +import Annex.Version +import Annex.InodeSentinal import Git.Types import Config import Utility.ThreadScheduler +import Logs.Location +import qualified Database.Keys #ifndef mingw32_HOST_OS import qualified Utility.Lsof as Lsof #endif @@ -88,10 +93,13 @@ runWatcher = do startup <- asIO1 startupScan matcher <- liftAnnex largeFilesMatcher direct <- liftAnnex isDirect + unlocked <- liftAnnex versionSupportsUnlockedPointers symlinkssupported <- liftAnnex $ coreSymlinks <$> Annex.getGitConfig - addhook <- hook $ if direct - then onAddDirect symlinkssupported matcher - else onAdd matcher + addhook <- hook $ if unlocked + then onAddUnlocked symlinkssupported matcher + else if direct + then onAddDirect symlinkssupported matcher + else onAdd matcher delhook <- hook onDel addsymlinkhook <- hook $ onAddSymlink direct deldirhook <- hook onDelDir @@ -216,15 +224,33 @@ onAdd matcher file filestatus shouldRestage :: DaemonStatus -> Bool shouldRestage ds = scanComplete ds || forceRestage ds +onAddUnlocked :: Bool -> FileMatcher Annex -> Handler +onAddUnlocked = onAddUnlocked' False contentchanged Database.Keys.addAssociatedFile samefilestatus + where + samefilestatus key file status = do + cache <- Database.Keys.getInodeCaches key + curr <- withTSDelta $ \delta -> liftIO $ toInodeCache delta file status + case (cache, curr) of + (_, Just c) -> elemInodeCaches c cache + ([], Nothing) -> return True + _ -> return False + contentchanged oldkey file = do + Database.Keys.removeAssociatedFile oldkey file + unlessM (inAnnex oldkey) $ + logStatus oldkey InfoMissing + {- In direct mode, add events are received for both new files, and - modified existing files. -} onAddDirect :: Bool -> FileMatcher Annex -> Handler -onAddDirect symlinkssupported matcher file fs = do +onAddDirect = onAddUnlocked' True changedDirect (\k f -> void $ addAssociatedFile k f) sameFileStatus + +onAddUnlocked' :: Bool -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> FileStatus -> Annex Bool) -> Bool -> FileMatcher Annex -> Handler +onAddUnlocked' isdirect contentchanged addassociatedfile samefilestatus symlinkssupported matcher file fs = do v <- liftAnnex $ catKeyFile file case (v, fs) of (Just key, Just filestatus) -> - ifM (liftAnnex $ sameFileStatus key file filestatus) + ifM (liftAnnex $ samefilestatus key file filestatus) {- It's possible to get an add event for - an existing file that is not - really modified, but it might have @@ -237,13 +263,13 @@ onAddDirect symlinkssupported matcher file fs = do , noChange ) , guardSymlinkStandin (Just key) $ do - debug ["changed direct", file] - liftAnnex $ changedDirect key file + debug ["changed", file] + liftAnnex $ contentchanged key file add matcher file ) _ -> unlessIgnored file $ guardSymlinkStandin Nothing $ do - debug ["add direct", file] + debug ["add", file] add matcher file where {- On a filesystem without symlinks, we'll get changes for regular @@ -259,9 +285,9 @@ onAddDirect symlinkssupported matcher file fs = do Just lt -> do case fileKey $ takeFileName lt of Nothing -> noop - Just key -> void $ liftAnnex $ - addAssociatedFile key file - onAddSymlink' linktarget mk True file fs + Just key -> liftAnnex $ + addassociatedfile key file + onAddSymlink' linktarget mk isdirect file fs {- A symlink might be an arbitrary symlink, which is just added. - Or, if it is a git-annex symlink, ensure it points to the content @@ -330,13 +356,15 @@ onDel file _ = do onDel' :: FilePath -> Annex () onDel' file = do - whenM isDirect $ do - mkey <- catKeyFile file - case mkey of - Nothing -> noop - Just key -> void $ removeAssociatedFile key file + ifM versionSupportsUnlockedPointers + ( withkey $ flip Database.Keys.removeAssociatedFile file + , whenM isDirect $ + withkey $ \key -> void $ removeAssociatedFile key file + ) Annex.Queue.addUpdateIndex =<< inRepo (Git.UpdateIndex.unstageFile file) + where + withkey a = maybe noop a =<< catKeyFile file {- A directory has been deleted, or moved, so tell git to remove anything - that was inside it from its cache. Since it could reappear at any time, diff --git a/debian/changelog b/debian/changelog index 0488c2eb11..fb12926b6c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,16 +4,18 @@ git-annex (6.20151225) unstable; urgency=medium * The upgrade to version 6 is not done fully automatically, because upgrading a direct mode repository to version 6 will prevent old versions of git-annex from working in other clones of that repository. + * init: --version parameter added to control which supported repository + version to use. * smudge: New command, used for git smudge filter. This will replace direct mode. - * init: Configure .git/info/attributes to use git-annex as a smudge + * init, upgrade: Configure .git/info/attributes to use git-annex as a smudge filter. Note that this changes the default behavior of git add in a newly initialized repository; it will add files to the annex. * unlock, lock: In v6 mode, unlocking a file changes it from a symlink to a pointer file, and this change can be committed to the git repository. * add: In v6 mode, adds modified files to the annex. - * init: --version parameter added to control which supported repository - version to use. + * assistant: In v6 mode, adds files in unlocked mode, so they can + continue to be modified. TODO -- Joey Hess Tue, 08 Dec 2015 11:14:03 -0400 diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 63f05c42bf..fe938bfbed 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -323,8 +323,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. * Still a few test suite failues for v6 with locked files. * Test suite should make pass for v6 with unlocked files. -* assistant: In v6 mode, adds files in unlocked mode, so they can - continue to be modified. TODO * When the webapp creates a repo, it forces it into direct mode. But that will fail when annex.version=6. Long-term, the assistant should make v6 repos, but short-term, the assistant should make v5 repos in direct mode. From 8e9608d7f028ccd44e33fcf383bd8cc6974856f5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 13:23:33 -0400 Subject: [PATCH 80/96] refactoring no behavior changes --- Annex/Ingest.hs | 220 +++++++++++++++++++++++++++++++++ Assistant/Threads/Committer.hs | 10 +- Command/Add.hs | 197 +---------------------------- Command/AddUnused.hs | 4 +- Command/AddUrl.hs | 5 +- Command/Indirect.hs | 4 +- Command/Lock.hs | 4 +- Command/ReKey.hs | 4 +- Types/KeySource.hs | 4 +- 9 files changed, 239 insertions(+), 213 deletions(-) create mode 100644 Annex/Ingest.hs diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs new file mode 100644 index 0000000000..0fd32a042f --- /dev/null +++ b/Annex/Ingest.hs @@ -0,0 +1,220 @@ +{- git-annex content ingestion + - + - Copyright 2010-2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Ingest ( + lockDown, + ingest, + finishIngestDirect, + addLink, + makeLink, + restoreFile, + forceParams, +) where + +import Common.Annex +import Types.KeySource +import Backend +import Annex.Content +import Annex.Content.Direct +import Annex.Perms +import Annex.Link +import Annex.MetaData +import qualified Annex +import qualified Annex.Queue +import Config +import Utility.InodeCache +import Annex.ReplaceFile +import Utility.Tmp +import Utility.CopyFile +import Annex.InodeSentinal +import Annex.Version +#ifdef WITH_CLIBS +#ifndef __ANDROID__ +import Utility.Touch +#endif +#endif + +import Control.Exception (IOException) + +{- The file that's being ingested is locked down before a key is generated, + - to prevent it from being modified in between. This lock down is not + - perfect at best (and pretty weak at worst). For example, it does not + - guard against files that are already opened for write by another process. + - So a KeySource is returned. Its inodeCache can be used to detect any + - changes that might be made to the file after it was locked down. + - + - When possible, the file is hard linked to a temp directory. This guards + - against some changes, like deletion or overwrite of the file, and + - allows lsof checks to be done more efficiently when adding a lot of files. + - + - Lockdown can fail if a file gets deleted, and Nothing will be returned. + -} +lockDown :: FilePath -> Annex (Maybe KeySource) +lockDown = either + (\e -> warning (show e) >> return Nothing) + (return . Just) + <=< lockDown' + +lockDown' :: FilePath -> Annex (Either IOException KeySource) +lockDown' file = ifM crippledFileSystem + ( withTSDelta $ liftIO . tryIO . nohardlink + , tryIO $ do + tmp <- fromRepo gitAnnexTmpMiscDir + createAnnexDirectory tmp + go tmp + ) + where + {- In indirect mode, the write bit is removed from the file as part + - of lock down to guard against further writes, and because objects + - in the annex have their write bit disabled anyway. + - + - Freezing the content early also lets us fail early when + - someone else owns the file. + - + - This is not done in direct mode, because files there need to + - remain writable at all times. + -} + go tmp = do + unlessM isDirect $ + freezeContent file + withTSDelta $ \delta -> liftIO $ do + (tmpfile, h) <- openTempFile tmp $ + relatedTemplate $ takeFileName file + hClose h + nukeFile tmpfile + withhardlink delta tmpfile `catchIO` const (nohardlink delta) + nohardlink delta = do + cache <- genInodeCache file delta + return KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = cache + } + withhardlink delta tmpfile = do + createLink file tmpfile + cache <- genInodeCache tmpfile delta + return KeySource + { keyFilename = file + , contentLocation = tmpfile + , inodeCache = cache + } + +{- Ingests a locked down file into the annex. + - + - In direct mode, leaves the file alone, and just updates bookkeeping + - information. + -} +ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache) +ingest Nothing = return (Nothing, Nothing) +ingest (Just source) = withTSDelta $ \delta -> do + backend <- chooseBackend $ keyFilename source + k <- genKey source backend + let src = contentLocation source + ms <- liftIO $ catchMaybeIO $ getFileStatus src + mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms + case (mcache, inodeCache source) of + (_, Nothing) -> go k mcache ms + (Just newc, Just c) | compareStrong c newc -> go k mcache ms + _ -> failure "changed while it was being added" + where + go k mcache ms = ifM isDirect + ( godirect k mcache ms + , goindirect k mcache ms + ) + + goindirect (Just (key, _)) mcache ms = do + catchNonAsync (moveAnnex key $ contentLocation source) + (restoreFile (keyFilename source) key) + maybe noop (genMetaData key (keyFilename source)) ms + liftIO $ nukeFile $ keyFilename source + return (Just key, mcache) + goindirect _ _ _ = failure "failed to generate a key" + + godirect (Just (key, _)) (Just cache) ms = do + addInodeCache key cache + maybe noop (genMetaData key (keyFilename source)) ms + finishIngestDirect key source + return (Just key, Just cache) + godirect _ _ _ = failure "failed to generate a key" + + failure msg = do + warning $ keyFilename source ++ " " ++ msg + when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + return (Nothing, Nothing) + +finishIngestDirect :: Key -> KeySource -> Annex () +finishIngestDirect key source = do + void $ addAssociatedFile key $ keyFilename source + when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + + {- Copy to any other locations using the same key. -} + otherfs <- filter (/= keyFilename source) <$> associatedFiles key + forM_ otherfs $ + addContentWhenNotPresent key (keyFilename source) + +{- On error, put the file back so it doesn't seem to have vanished. + - This can be called before or after the symlink is in place. -} +restoreFile :: FilePath -> Key -> SomeException -> Annex a +restoreFile file key e = do + whenM (inAnnex key) $ do + liftIO $ nukeFile file + -- The key could be used by other files too, so leave the + -- content in the annex, and make a copy back to the file. + obj <- calcRepo $ gitAnnexLocation key + unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ + warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj + thawContent file + throwM e + +{- Creates the symlink to the annexed content, returns the link target. -} +makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String +makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do + l <- calcRepo $ gitAnnexLink file key + replaceFile file $ makeAnnexLink l + + -- touch symlink to have same time as the original file, + -- as provided in the InodeCache + case mcache of +#if defined(WITH_CLIBS) && ! defined(__ANDROID__) + Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False +#else + Just _ -> noop +#endif + Nothing -> noop + + return l + +{- Creates the symlink to the annexed content, and stages it in git. + - + - As long as the filesystem supports symlinks, we use + - git add, rather than directly staging the symlink to git. + - Using git add is best because it allows the queuing to work + - and is faster (staging the symlink runs hash-object commands each time). + - Also, using git add allows it to skip gitignored files, unless forced + - to include them. + -} +addLink :: FilePath -> Key -> Maybe InodeCache -> Annex () +addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) + ( do + _ <- makeLink file key mcache + ps <- forceParams + Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] + , do + l <- makeLink file key mcache + addAnnexLink l file + ) + +{- Parameters to pass to git add, forcing addition of ignored files. -} +forceParams :: Annex [CommandParam] +forceParams = ifM (Annex.getState Annex.force) + ( return [Param "-f"] + , return [] + ) diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 891df8419b..5e8df56c82 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -21,13 +21,13 @@ import Logs.Transfer import Logs.Location import qualified Annex.Queue import qualified Git.LsFiles -import qualified Command.Add import Utility.ThreadScheduler import qualified Utility.Lsof as Lsof import qualified Utility.DirWatcher as DirWatcher import Types.KeySource import Config import Annex.Content +import Annex.Ingest import Annex.Link import Annex.CatFile import Annex.InodeSentinal @@ -314,7 +314,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do doadd = sanitycheck ks $ do (mkey, mcache) <- liftAnnex $ do showStart "add" $ keyFilename ks - Command.Add.ingest $ Just ks + ingest $ Just ks maybe (failedingest change) (done change mcache $ keyFilename ks) mkey add _ = return Nothing @@ -344,7 +344,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do fastadddirect :: Change -> Key -> Assistant (Maybe Change) fastadddirect change key = do let source = keySource change - liftAnnex $ Command.Add.finishIngestDirect key source + liftAnnex $ finishIngestDirect key source done change Nothing (keyFilename source) key fastaddunlocked :: Change -> Key -> Assistant (Maybe Change) @@ -377,7 +377,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do , do link <- ifM isDirect ( calcRepo $ gitAnnexLink file key - , Command.Add.link file key mcache + , makeLink file key mcache ) whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $ stageSymlink file =<< hashSymlink link @@ -424,7 +424,7 @@ safeToAdd _ _ [] [] = return [] safeToAdd havelsof delayadd pending inprocess = do maybe noop (liftIO . threadDelaySeconds) delayadd liftAnnex $ do - keysources <- forM pending $ Command.Add.lockDown . changeFile + keysources <- forM pending $ lockDown . changeFile let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending keysources) openfiles <- if havelsof then S.fromList . map fst3 . filter openwrite <$> diff --git a/Command/Add.hs b/Command/Add.hs index ab4e3a9d17..b1b830cbcf 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -5,39 +5,23 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP #-} - module Command.Add where import Common.Annex import Command -import Types.KeySource -import Backend +import Annex.Ingest import Logs.Location import Annex.Content import Annex.Content.Direct -import Annex.Perms import Annex.Link -import Annex.MetaData import qualified Annex import qualified Annex.Queue -#ifdef WITH_CLIBS -#ifndef __ANDROID__ -import Utility.Touch -#endif -#endif import Config import Utility.InodeCache import Annex.FileMatcher -import Annex.ReplaceFile -import Utility.Tmp -import Utility.CopyFile -import Annex.InodeSentinal import Annex.Version import qualified Database.Keys -import Control.Exception (IOException) - cmd :: Command cmd = notBareRepo $ withGlobalOptions (jobsOption : fileMatchingOptions) $ command "add" SectionCommon "add files to annex" @@ -89,9 +73,6 @@ addFile file = do Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] return True -{- The add subcommand annexes a file, generating a key for it using a - - backend, and then moving it into the annex directory and setting up - - the symlink pointing to its content. -} start :: FilePath -> CommandStart start file = ifAnnexed file addpresent add where @@ -131,188 +112,12 @@ start file = ifAnnexed file addpresent add void $ addAssociatedFile key file next $ next $ cleanup file key Nothing =<< inAnnex key -{- The file that's being added is locked down before a key is generated, - - to prevent it from being modified in between. This lock down is not - - perfect at best (and pretty weak at worst). For example, it does not - - guard against files that are already opened for write by another process. - - So a KeySource is returned. Its inodeCache can be used to detect any - - changes that might be made to the file after it was locked down. - - - - When possible, the file is hard linked to a temp directory. This guards - - against some changes, like deletion or overwrite of the file, and - - allows lsof checks to be done more efficiently when adding a lot of files. - - - - Lockdown can fail if a file gets deleted, and Nothing will be returned. - -} -lockDown :: FilePath -> Annex (Maybe KeySource) -lockDown = either - (\e -> warning (show e) >> return Nothing) - (return . Just) - <=< lockDown' - -lockDown' :: FilePath -> Annex (Either IOException KeySource) -lockDown' file = ifM crippledFileSystem - ( withTSDelta $ liftIO . tryIO . nohardlink - , tryIO $ do - tmp <- fromRepo gitAnnexTmpMiscDir - createAnnexDirectory tmp - go tmp - ) - where - {- In indirect mode, the write bit is removed from the file as part - - of lock down to guard against further writes, and because objects - - in the annex have their write bit disabled anyway. - - - - Freezing the content early also lets us fail early when - - someone else owns the file. - - - - This is not done in direct mode, because files there need to - - remain writable at all times. - -} - go tmp = do - unlessM isDirect $ - freezeContent file - withTSDelta $ \delta -> liftIO $ do - (tmpfile, h) <- openTempFile tmp $ - relatedTemplate $ takeFileName file - hClose h - nukeFile tmpfile - withhardlink delta tmpfile `catchIO` const (nohardlink delta) - nohardlink delta = do - cache <- genInodeCache file delta - return KeySource - { keyFilename = file - , contentLocation = file - , inodeCache = cache - } - withhardlink delta tmpfile = do - createLink file tmpfile - cache <- genInodeCache tmpfile delta - return KeySource - { keyFilename = file - , contentLocation = tmpfile - , inodeCache = cache - } - -{- Ingests a locked down file into the annex. - - - - In direct mode, leaves the file alone, and just updates bookkeeping - - information. - -} -ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache) -ingest Nothing = return (Nothing, Nothing) -ingest (Just source) = withTSDelta $ \delta -> do - backend <- chooseBackend $ keyFilename source - k <- genKey source backend - let src = contentLocation source - ms <- liftIO $ catchMaybeIO $ getFileStatus src - mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms - case (mcache, inodeCache source) of - (_, Nothing) -> go k mcache ms - (Just newc, Just c) | compareStrong c newc -> go k mcache ms - _ -> failure "changed while it was being added" - where - go k mcache ms = ifM isDirect - ( godirect k mcache ms - , goindirect k mcache ms - ) - - goindirect (Just (key, _)) mcache ms = do - catchNonAsync (moveAnnex key $ contentLocation source) - (undo (keyFilename source) key) - maybe noop (genMetaData key (keyFilename source)) ms - liftIO $ nukeFile $ keyFilename source - return (Just key, mcache) - goindirect _ _ _ = failure "failed to generate a key" - - godirect (Just (key, _)) (Just cache) ms = do - addInodeCache key cache - maybe noop (genMetaData key (keyFilename source)) ms - finishIngestDirect key source - return (Just key, Just cache) - godirect _ _ _ = failure "failed to generate a key" - - failure msg = do - warning $ keyFilename source ++ " " ++ msg - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source - return (Nothing, Nothing) - -finishIngestDirect :: Key -> KeySource -> Annex () -finishIngestDirect key source = do - void $ addAssociatedFile key $ keyFilename source - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source - - {- Copy to any other locations using the same key. -} - otherfs <- filter (/= keyFilename source) <$> associatedFiles key - forM_ otherfs $ - addContentWhenNotPresent key (keyFilename source) - perform :: FilePath -> CommandPerform perform file = lockDown file >>= ingest >>= go where go (Just key, cache) = next $ cleanup file key cache True go (Nothing, _) = stop -{- On error, put the file back so it doesn't seem to have vanished. - - This can be called before or after the symlink is in place. -} -undo :: FilePath -> Key -> SomeException -> Annex a -undo file key e = do - whenM (inAnnex key) $ do - liftIO $ nukeFile file - -- The key could be used by other files too, so leave the - -- content in the annex, and make a copy back to the file. - obj <- calcRepo $ gitAnnexLocation key - unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ - warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj - thawContent file - throwM e - -{- Creates the symlink to the annexed content, returns the link target. -} -link :: FilePath -> Key -> Maybe InodeCache -> Annex String -link file key mcache = flip catchNonAsync (undo file key) $ do - l <- calcRepo $ gitAnnexLink file key - replaceFile file $ makeAnnexLink l - - -- touch symlink to have same time as the original file, - -- as provided in the InodeCache - case mcache of -#if defined(WITH_CLIBS) && ! defined(__ANDROID__) - Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False -#else - Just _ -> noop -#endif - Nothing -> noop - - return l - -{- Creates the symlink to the annexed content, and stages it in git. - - - - As long as the filesystem supports symlinks, we use - - git add, rather than directly staging the symlink to git. - - Using git add is best because it allows the queuing to work - - and is faster (staging the symlink runs hash-object commands each time). - - Also, using git add allows it to skip gitignored files, unless forced - - to include them. - -} -addLink :: FilePath -> Key -> Maybe InodeCache -> Annex () -addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) - ( do - _ <- link file key mcache - ps <- forceParams - Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] - , do - l <- link file key mcache - addAnnexLink l file - ) - -forceParams :: Annex [CommandParam] -forceParams = ifM (Annex.getState Annex.force) - ( return [Param "-f"] - , return [] - ) - cleanup :: FilePath -> Key -> Maybe InodeCache -> Bool -> CommandCleanup cleanup file key mcache hascontent = do ifM (isDirect <&&> pure hascontent) diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs index 2b315eada4..57fd0cf388 100644 --- a/Command/AddUnused.hs +++ b/Command/AddUnused.hs @@ -10,7 +10,7 @@ module Command.AddUnused where import Common.Annex import Logs.Location import Command -import qualified Command.Add +import Annex.Ingest import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused) import Types.Key @@ -31,7 +31,7 @@ start = startUnused "addunused" perform perform :: Key -> CommandPerform perform key = next $ do logStatus key InfoPresent - Command.Add.addLink file key Nothing + addLink file key Nothing return True where file = "unused." ++ key2file key diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index de83d8c9b4..659274e49e 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -14,14 +14,15 @@ import Network.URI import Common.Annex import Command import Backend -import qualified Command.Add import qualified Annex import qualified Annex.Queue import qualified Annex.Url as Url import qualified Backend.URL import qualified Remote import qualified Types.Remote as Remote +import qualified Command.Add import Annex.Content +import Annex.Ingest import Annex.UUID import Logs.Web import Types.Key @@ -359,7 +360,7 @@ cleanup u url file key mtmp = case mtmp of when (isJust mtmp) $ logStatus key InfoPresent setUrlPresent u key url - Command.Add.addLink file key Nothing + addLink file key Nothing whenM isDirect $ do void $ addAssociatedFile key file {- For moveAnnex to work in direct mode, the symlink diff --git a/Command/Indirect.hs b/Command/Indirect.hs index f5234b4dc8..06897e292e 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -20,7 +20,7 @@ import Annex.Content import Annex.Content.Direct import Annex.CatFile import Annex.Init -import qualified Command.Add +import Annex.Ingest cmd :: Command cmd = notBareRepo $ noDaemonRunning $ @@ -90,7 +90,7 @@ perform = do Right _ -> do l <- calcRepo $ gitAnnexLink f k liftIO $ createSymbolicLink l f - Left e -> catchNonAsync (Command.Add.undo f k e) + Left e -> catchNonAsync (restoreFile f k e) warnlocked showEndOk diff --git a/Command/Lock.hs b/Command/Lock.hs index 741c18c150..1be6e9c761 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -19,7 +19,7 @@ import Annex.Perms import Annex.ReplaceFile import Utility.InodeCache import qualified Database.Keys -import qualified Command.Add +import Annex.Ingest import Logs.Location cmd :: Command @@ -60,7 +60,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file) performNew :: FilePath -> Key -> Bool -> CommandPerform performNew file key filemodified = do lockdown =<< calcRepo (gitAnnexLocation key) - Command.Add.addLink file key + addLink file key =<< withTSDelta (liftIO . genInodeCache file) next $ cleanupNew file key where diff --git a/Command/ReKey.hs b/Command/ReKey.hs index fe13d4dd45..9fb8515c01 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -12,7 +12,7 @@ import Command import qualified Annex import Types.Key import Annex.Content -import qualified Command.Add +import Annex.Ingest import Logs.Web import Logs.Location import Utility.CopyFile @@ -70,6 +70,6 @@ cleanup file oldkey newkey = do -- Update symlink to use the new key. liftIO $ removeFile file - Command.Add.addLink file newkey Nothing + addLink file newkey Nothing logStatus newkey InfoPresent return True diff --git a/Types/KeySource.hs b/Types/KeySource.hs index 7c2fd13d56..25774588a2 100644 --- a/Types/KeySource.hs +++ b/Types/KeySource.hs @@ -9,7 +9,7 @@ module Types.KeySource where import Utility.InodeCache -{- When content is in the process of being added to the annex, +{- When content is in the process of being ingested into the annex, - and a Key generated from it, this data type is used. - - The contentLocation may be different from the filename @@ -19,7 +19,7 @@ import Utility.InodeCache - of a different Key. - - The inodeCache can be used to detect some types of modifications to - - files that may be made while they're in the process of being added. + - files that may be made while they're in the process of being ingested. -} data KeySource = KeySource { keyFilename :: FilePath From 439214094693bcaea0d2e460fba3f566e56027a0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 15:20:03 -0400 Subject: [PATCH 81/96] make linkAnnex detect when the file changes as it's being copied/linked in This fixes a race where the modified file ended up in annex/objects, and the InodeCache stored in the database was for the modified version, so git-annex didn't know it had gotten modified. The race could occur when the smudge filter was running; now it gets the InodeCache before generating the Key, which avoids the race. --- Annex/Content.hs | 35 ++++++++++++++++++++++++----------- Command/Smudge.hs | 12 ++++++++---- Command/Unlock.hs | 5 ++++- Upgrade/V5.hs | 4 +++- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 4cd2b02597..c00863e310 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -521,30 +521,43 @@ populatePointerFile k obj f = go =<< isPointerFile f - prevent losing the content if the source file is deleted, but does not - guard against modifications. -} -linkAnnex :: Key -> FilePath -> Annex LinkAnnexResult -linkAnnex key src = do +linkAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult +linkAnnex key src srcic = do dest <- calcRepo (gitAnnexLocation key) - modifyContent dest $ linkAnnex' key src dest + modifyContent dest $ linkAnnex' key src srcic dest {- Hard links (or copies) src to dest, one of which should be the - annex object. Updates inode cache for src and for dest when it's - changed. -} -linkAnnex' :: Key -> FilePath -> FilePath -> Annex LinkAnnexResult -linkAnnex' key src dest = +linkAnnex' :: Key -> FilePath -> Maybe InodeCache -> FilePath -> Annex LinkAnnexResult +linkAnnex' _ _ Nothing _ = return LinkAnnexFailed +linkAnnex' key src (Just srcic) dest = ifM (liftIO $ doesFileExist dest) ( do - Database.Keys.storeInodeCaches key [src] + Database.Keys.addInodeCaches key [srcic] return LinkAnnexNoop , ifM (linkAnnex'' key src dest) ( do thawContent dest - Database.Keys.storeInodeCaches key [dest, src] - return LinkAnnexOk - , do - Database.Keys.storeInodeCaches key [src] - return LinkAnnexFailed + -- src could have changed while being copied + -- to dest + mcache <- withTSDelta (liftIO . genInodeCache src) + case mcache of + Just srcic' | compareStrong srcic srcic' -> do + destic <- withTSDelta (liftIO . genInodeCache dest) + Database.Keys.addInodeCaches key $ + catMaybes [destic, Just srcic] + return LinkAnnexOk + _ -> do + liftIO $ nukeFile dest + failed + , failed ) ) + where + failed = do + Database.Keys.addInodeCaches key [srcic] + return LinkAnnexFailed data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop diff --git a/Command/Smudge.hs b/Command/Smudge.hs index e6541bc6d6..2876326b8b 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -14,6 +14,7 @@ import Annex.Link import Annex.MetaData import Annex.FileMatcher import Annex.InodeSentinal +import Utility.InodeCache import Types.KeySource import Backend import Logs.Location @@ -86,7 +87,7 @@ clean file = do -- If the file being cleaned was hard linked to the old key's annex object, -- modifying the file will have caused the object to have the wrong content. --- Clean up from that, making the +-- Clean up from that. cleanOldKey :: FilePath -> Key -> Annex () cleanOldKey modifiedfile key = do obj <- calcRepo (gitAnnexLocation key) @@ -99,7 +100,9 @@ cleanOldKey modifiedfile key = do case fs' of -- If linkAnnex fails, the file with the content -- is still present, so no need for any recovery. - (f:_) -> void $ linkAnnex key f + (f:_) -> do + ic <- withTSDelta (liftIO . genInodeCache f) + void $ linkAnnex key f ic _ -> lostcontent where lostcontent = logStatus key InfoMissing @@ -112,17 +115,18 @@ shouldAnnex file = do ingest :: FilePath -> Annex Key ingest file = do backend <- chooseBackend file + ic <- withTSDelta (liftIO . genInodeCache file) let source = KeySource { keyFilename = file , contentLocation = file - , inodeCache = Nothing + , inodeCache = ic } k <- fst . fromMaybe (error "failed to generate a key") <$> genKey source backend -- Hard link (or copy) file content to annex object -- to prevent it from being lost when git checks out -- a branch not containing this file. - r <- linkAnnex k file + r <- linkAnnex k file ic case r of LinkAnnexFailed -> error "Problem adding file to the annex" LinkAnnexOk -> logStatus k InfoPresent diff --git a/Command/Unlock.hs b/Command/Unlock.hs index 1cfd4a0b2b..b82f78096b 100644 --- a/Command/Unlock.hs +++ b/Command/Unlock.hs @@ -14,6 +14,8 @@ import Annex.CatFile import Annex.Version import Annex.Link import Annex.ReplaceFile +import Annex.InodeSentinal +import Utility.InodeCache import Utility.CopyFile cmd :: Command @@ -51,8 +53,9 @@ start file key = ifM (isJust <$> isAnnexLink file) performNew :: FilePath -> Key -> CommandPerform performNew dest key = do src <- calcRepo (gitAnnexLocation key) + srcic <- withTSDelta (liftIO . genInodeCache src) replaceFile dest $ \tmp -> do - r <- linkAnnex' key src tmp + r <- linkAnnex' key src srcic tmp case r of LinkAnnexOk -> return () _ -> error "linkAnnex failed" diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs index 2073a0150f..f6d18df435 100644 --- a/Upgrade/V5.hs +++ b/Upgrade/V5.hs @@ -20,6 +20,7 @@ import qualified Git import qualified Git.LsFiles import qualified Git.Branch import Git.FileMode +import Utility.InodeCache upgrade :: Bool -> Annex Bool upgrade automatic = do @@ -88,7 +89,8 @@ upgradeDirectWorkTree = do -- not populated with it. Since the work tree file -- is recorded as an associated file, things will still -- work that way, it's just not ideal. - void $ linkAnnex k f + ic <- withTSDelta (liftIO . genInodeCache f) + void $ linkAnnex k f ic writepointer f k = liftIO $ do nukeFile f writeFile f (formatPointer k) From 4f60234690d08b617771147e3f0ebedd53c41c3d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 15:23:27 -0400 Subject: [PATCH 82/96] finish v6 support for assistant Seems to basically work now! --- Annex/Ingest.hs | 102 ++++++++++++++++++--------------- Assistant/Threads/Committer.hs | 67 +++++++++++----------- Assistant/Types/Changes.hs | 13 +++-- Command/Add.hs | 4 +- 4 files changed, 99 insertions(+), 87 deletions(-) diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index 0fd32a042f..707f71eff3 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -8,6 +8,7 @@ {-# LANGUAGE CPP #-} module Annex.Ingest ( + LockedDown(..), lockDown, ingest, finishIngestDirect, @@ -33,7 +34,6 @@ import Annex.ReplaceFile import Utility.Tmp import Utility.CopyFile import Annex.InodeSentinal -import Annex.Version #ifdef WITH_CLIBS #ifndef __ANDROID__ import Utility.Touch @@ -42,46 +42,42 @@ import Utility.Touch import Control.Exception (IOException) +data LockedDown = LockedDown + { lockingFile :: Bool + , keySource :: KeySource + } + deriving (Show) + {- The file that's being ingested is locked down before a key is generated, - to prevent it from being modified in between. This lock down is not - perfect at best (and pretty weak at worst). For example, it does not - guard against files that are already opened for write by another process. - - So a KeySource is returned. Its inodeCache can be used to detect any - - changes that might be made to the file after it was locked down. + - So, the InodeCache can be used to detect any changes that might be made + - to the file after it was locked down. - - When possible, the file is hard linked to a temp directory. This guards - against some changes, like deletion or overwrite of the file, and - allows lsof checks to be done more efficiently when adding a lot of files. - + - If the file is to be locked, lockingfile is True. Then the write + - bit is removed from the file as part of lock down to guard against + - further writes. + - - Lockdown can fail if a file gets deleted, and Nothing will be returned. -} -lockDown :: FilePath -> Annex (Maybe KeySource) -lockDown = either +lockDown :: Bool -> FilePath -> Annex (Maybe LockedDown) +lockDown lockingfile file = either (\e -> warning (show e) >> return Nothing) (return . Just) - <=< lockDown' + =<< lockDown' lockingfile file -lockDown' :: FilePath -> Annex (Either IOException KeySource) -lockDown' file = ifM crippledFileSystem +lockDown' :: Bool -> FilePath -> Annex (Either IOException LockedDown) +lockDown' lockingfile file = ifM crippledFileSystem ( withTSDelta $ liftIO . tryIO . nohardlink , tryIO $ do tmp <- fromRepo gitAnnexTmpMiscDir createAnnexDirectory tmp - go tmp - ) - where - {- In indirect mode, the write bit is removed from the file as part - - of lock down to guard against further writes, and because objects - - in the annex have their write bit disabled anyway. - - - - Freezing the content early also lets us fail early when - - someone else owns the file. - - - - This is not done in direct mode, because files there need to - - remain writable at all times. - -} - go tmp = do - unlessM isDirect $ + when lockingfile $ freezeContent file withTSDelta $ \delta -> liftIO $ do (tmpfile, h) <- openTempFile tmp $ @@ -89,9 +85,11 @@ lockDown' file = ifM crippledFileSystem hClose h nukeFile tmpfile withhardlink delta tmpfile `catchIO` const (nohardlink delta) + ) + where nohardlink delta = do cache <- genInodeCache file delta - return KeySource + return $ LockedDown lockingfile $ KeySource { keyFilename = file , contentLocation = file , inodeCache = cache @@ -99,7 +97,7 @@ lockDown' file = ifM crippledFileSystem withhardlink delta tmpfile = do createLink file tmpfile cache <- genInodeCache tmpfile delta - return KeySource + return $ LockedDown lockingfile $ KeySource { keyFilename = file , contentLocation = tmpfile , inodeCache = cache @@ -107,12 +105,13 @@ lockDown' file = ifM crippledFileSystem {- Ingests a locked down file into the annex. - - - In direct mode, leaves the file alone, and just updates bookkeeping - - information. + - The file may be added to the git repository as a locked or an unlocked + - file. When unlocked, the work tree file is left alone. When locked, + - the work tree file is deleted, in preparation for adding the symlink. -} -ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache) +ingest :: Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache) ingest Nothing = return (Nothing, Nothing) -ingest (Just source) = withTSDelta $ \delta -> do +ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do backend <- chooseBackend $ keyFilename source k <- genKey source backend let src = contentLocation source @@ -123,43 +122,56 @@ ingest (Just source) = withTSDelta $ \delta -> do (Just newc, Just c) | compareStrong c newc -> go k mcache ms _ -> failure "changed while it was being added" where - go k mcache ms = ifM isDirect - ( godirect k mcache ms - , goindirect k mcache ms - ) + go (Just (key, _)) mcache (Just s) + | lockingfile = golocked key mcache s + | otherwise = ifM isDirect + ( godirect key mcache s + , gounlocked key mcache s + ) + go _ _ _ = failure "failed to generate a key" - goindirect (Just (key, _)) mcache ms = do + golocked key mcache s = do catchNonAsync (moveAnnex key $ contentLocation source) (restoreFile (keyFilename source) key) - maybe noop (genMetaData key (keyFilename source)) ms liftIO $ nukeFile $ keyFilename source - return (Just key, mcache) - goindirect _ _ _ = failure "failed to generate a key" + success key mcache s - godirect (Just (key, _)) (Just cache) ms = do + gounlocked key (Just cache) s = do + r <- linkAnnex key (keyFilename source) (Just cache) + case r of + LinkAnnexFailed -> failure "failed to link to annex" + _ -> success key (Just cache) s + gounlocked _ _ _ = failure "failed statting file" + + godirect key (Just cache) s = do addInodeCache key cache - maybe noop (genMetaData key (keyFilename source)) ms finishIngestDirect key source - return (Just key, Just cache) - godirect _ _ _ = failure "failed to generate a key" + success key (Just cache) s + godirect _ _ _ = failure "failed statting file" + + success k mcache s = do + genMetaData k (keyFilename source) s + return (Just k, mcache) failure msg = do warning $ keyFilename source ++ " " ++ msg - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source + cleanCruft source return (Nothing, Nothing) finishIngestDirect :: Key -> KeySource -> Annex () finishIngestDirect key source = do void $ addAssociatedFile key $ keyFilename source - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source + cleanCruft source {- Copy to any other locations using the same key. -} otherfs <- filter (/= keyFilename source) <$> associatedFiles key forM_ otherfs $ addContentWhenNotPresent key (keyFilename source) +cleanCruft :: KeySource -> Annex () +cleanCruft source = when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + {- On error, put the file back so it doesn't seem to have vanished. - This can be called before or after the symlink is in place. -} restoreFile :: FilePath -> Key -> SomeException -> Annex a diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 5e8df56c82..3e00011f5a 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -266,10 +266,12 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do let (pending, inprocess) = partition isPendingAddChange incomplete direct <- liftAnnex isDirect unlocked <- liftAnnex versionSupportsUnlockedPointers + let lockingfiles = not (unlocked || direct) (pending', cleanup) <- if unlocked || direct then return (pending, noop) else findnew pending - (postponed, toadd) <- partitionEithers <$> safeToAdd havelsof delayadd pending' inprocess + (postponed, toadd) <- partitionEithers + <$> safeToAdd lockingfiles havelsof delayadd pending' inprocess cleanup unless (null postponed) $ @@ -278,9 +280,9 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do returnWhen (null toadd) $ do added <- addaction toadd $ catMaybes <$> - if unlocked || direct + if not lockingfiles then addunlocked direct toadd - else forM toadd add + else forM toadd (add lockingfiles) if DirWatcher.eventsCoalesce || null added || unlocked || direct then return $ added ++ otherchanges else do @@ -307,16 +309,17 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do | c = return otherchanges | otherwise = a - add :: Change -> Assistant (Maybe Change) - add change@(InProcessAddChange { keySource = ks }) = + add :: Bool -> Change -> Assistant (Maybe Change) + add lockingfile change@(InProcessAddChange { lockedDown = ld }) = catchDefaultIO Nothing <~> doadd where + ks = keySource ld doadd = sanitycheck ks $ do (mkey, mcache) <- liftAnnex $ do showStart "add" $ keyFilename ks - ingest $ Just ks + ingest $ Just $ LockedDown lockingfile ks maybe (failedingest change) (done change mcache $ keyFilename ks) mkey - add _ = return Nothing + add _ _ = return Nothing {- Avoid overhead of re-injesting a renamed unlocked file, by - examining the other Changes to see if a removed file has the @@ -329,29 +332,22 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do m <- liftAnnex $ removedKeysMap isdirect ct cs delta <- liftAnnex getTSDelta if M.null m - then forM toadd add + then forM toadd (add False) else forM toadd $ \c -> do mcache <- liftIO $ genInodeCache (changeFile c) delta case mcache of - Nothing -> add c + Nothing -> add False c Just cache -> case M.lookup (inodeCacheToKey ct cache) m of - Nothing -> add c - Just k -> if isdirect - then fastadddirect c k - else fastaddunlocked c k + Nothing -> add False c + Just k -> fastadd isdirect c k - fastadddirect :: Change -> Key -> Assistant (Maybe Change) - fastadddirect change key = do - let source = keySource change - liftAnnex $ finishIngestDirect key source - done change Nothing (keyFilename source) key - - fastaddunlocked :: Change -> Key -> Assistant (Maybe Change) - fastaddunlocked change key = do - let source = keySource change - liftAnnex $ do - Database.Keys.addAssociatedFile key (keyFilename source) + fastadd :: Bool -> Change -> Key -> Assistant (Maybe Change) + fastadd isdirect change key = do + let source = keySource $ lockedDown change + liftAnnex $ if isdirect + then finishIngestDirect key source + else Database.Keys.addAssociatedFile key (keyFilename source) done change Nothing (keyFilename source) key removedKeysMap :: Bool -> InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) @@ -419,16 +415,16 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do - - Check by running lsof on the repository. -} -safeToAdd :: Bool -> Maybe Seconds -> [Change] -> [Change] -> Assistant [Either Change Change] -safeToAdd _ _ [] [] = return [] -safeToAdd havelsof delayadd pending inprocess = do +safeToAdd :: Bool -> Bool -> Maybe Seconds -> [Change] -> [Change] -> Assistant [Either Change Change] +safeToAdd _ _ _ [] [] = return [] +safeToAdd lockingfiles havelsof delayadd pending inprocess = do maybe noop (liftIO . threadDelaySeconds) delayadd liftAnnex $ do - keysources <- forM pending $ lockDown . changeFile - let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending keysources) + lockeddown <- forM pending $ lockDown lockingfiles . changeFile + let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending lockeddown) openfiles <- if havelsof then S.fromList . map fst3 . filter openwrite <$> - findopenfiles (map keySource inprocess') + findopenfiles (map (keySource . lockedDown) inprocess') else pure S.empty let checked = map (check openfiles) inprocess' @@ -441,17 +437,18 @@ safeToAdd havelsof delayadd pending inprocess = do allRight $ rights checked else return checked where - check openfiles change@(InProcessAddChange { keySource = ks }) - | S.member (contentLocation ks) openfiles = Left change + check openfiles change@(InProcessAddChange { lockedDown = ld }) + | S.member (contentLocation (keySource ld)) openfiles = Left change check _ change = Right change - mkinprocess (c, Just ks) = Just InProcessAddChange + mkinprocess (c, Just ld) = Just InProcessAddChange { changeTime = changeTime c - , keySource = ks + , lockedDown = ld } mkinprocess (_, Nothing) = Nothing - canceladd (InProcessAddChange { keySource = ks }) = do + canceladd (InProcessAddChange { lockedDown = ld }) = do + let ks = keySource ld warning $ keyFilename ks ++ " still has writers, not adding" -- remove the hard link diff --git a/Assistant/Types/Changes.hs b/Assistant/Types/Changes.hs index 1d8b517754..8c2d02cab1 100644 --- a/Assistant/Types/Changes.hs +++ b/Assistant/Types/Changes.hs @@ -10,6 +10,7 @@ module Assistant.Types.Changes where import Types.KeySource import Types.Key import Utility.TList +import Annex.Ingest import Control.Concurrent.STM import Data.Time.Clock @@ -38,7 +39,7 @@ data Change } | InProcessAddChange { changeTime ::UTCTime - , keySource :: KeySource + , lockedDown :: LockedDown } deriving (Show) @@ -53,7 +54,7 @@ changeInfoKey _ = Nothing changeFile :: Change -> FilePath changeFile (Change _ f _) = f changeFile (PendingAddChange _ f) = f -changeFile (InProcessAddChange _ ks) = keyFilename ks +changeFile (InProcessAddChange _ ld) = keyFilename $ keySource ld isPendingAddChange :: Change -> Bool isPendingAddChange (PendingAddChange {}) = True @@ -64,14 +65,14 @@ isInProcessAddChange (InProcessAddChange {}) = True isInProcessAddChange _ = False retryChange :: Change -> Change -retryChange (InProcessAddChange time ks) = - PendingAddChange time (keyFilename ks) +retryChange c@(InProcessAddChange time _) = + PendingAddChange time $ changeFile c retryChange c = c finishedChange :: Change -> Key -> Change -finishedChange c@(InProcessAddChange { keySource = ks }) k = Change +finishedChange c@(InProcessAddChange {}) k = Change { changeTime = changeTime c - , _changeFile = keyFilename ks + , _changeFile = changeFile c , changeInfo = AddKeyChange k } finishedChange c _ = c diff --git a/Command/Add.hs b/Command/Add.hs index b1b830cbcf..8a7db0a912 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -113,7 +113,9 @@ start file = ifAnnexed file addpresent add next $ next $ cleanup file key Nothing =<< inAnnex key perform :: FilePath -> CommandPerform -perform file = lockDown file >>= ingest >>= go +perform file = do + lockingfile <- not <$> isDirect + lockDown lockingfile file >>= ingest >>= go where go (Just key, cache) = next $ cleanup file key cache True go (Nothing, _) = stop From cfaac52b88e157dd4e71626fe68af37015b9c9bd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 16:22:28 -0400 Subject: [PATCH 83/96] populate unlocked files with newly available content when ingesting This can happen when ingesting a new file in either locked or unlocked mode, when some unlocked files in the repo use the same key, and the content was not locally available before. --- Annex/Content.hs | 1 + Annex/Ingest.hs | 29 +++++++++++++++++++++++++---- Assistant/Threads/Committer.hs | 2 +- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index c00863e310..e501df072a 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -24,6 +24,7 @@ module Annex.Content ( withTmp, checkDiskSpace, moveAnnex, + populatePointerFile, linkAnnex, linkAnnex', LinkAnnexResult(..), diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index 707f71eff3..7f38e9beb7 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -12,6 +12,7 @@ module Annex.Ingest ( lockDown, ingest, finishIngestDirect, + finishIngestUnlocked, addLink, makeLink, restoreFile, @@ -28,6 +29,7 @@ import Annex.Link import Annex.MetaData import qualified Annex import qualified Annex.Queue +import qualified Database.Keys import Config import Utility.InodeCache import Annex.ReplaceFile @@ -59,9 +61,8 @@ data LockedDown = LockedDown - against some changes, like deletion or overwrite of the file, and - allows lsof checks to be done more efficiently when adding a lot of files. - - - If the file is to be locked, lockingfile is True. Then the write - - bit is removed from the file as part of lock down to guard against - - further writes. + - If lockingfile is True, the file is going to be added in locked mode. + - So, its write bit is removed as part of the lock down. - - Lockdown can fail if a file gets deleted, and Nothing will be returned. -} @@ -134,13 +135,20 @@ ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do catchNonAsync (moveAnnex key $ contentLocation source) (restoreFile (keyFilename source) key) liftIO $ nukeFile $ keyFilename source + populateAssociatedFiles key source success key mcache s gounlocked key (Just cache) s = do + -- Remove temp directory hard link first because + -- linkAnnex falls back to copying if a file + -- already has a hard link. + cleanCruft source r <- linkAnnex key (keyFilename source) (Just cache) case r of LinkAnnexFailed -> failure "failed to link to annex" - _ -> success key (Just cache) s + _ -> do + finishIngestUnlocked key source + success key (Just cache) s gounlocked _ _ _ = failure "failed statting file" godirect key (Just cache) s = do @@ -168,6 +176,19 @@ finishIngestDirect key source = do forM_ otherfs $ addContentWhenNotPresent key (keyFilename source) +finishIngestUnlocked :: Key -> KeySource -> Annex () +finishIngestUnlocked key source = do + Database.Keys.addAssociatedFile key (keyFilename source) + populateAssociatedFiles key source + +{- Copy to any other locations using the same key. -} +populateAssociatedFiles :: Key -> KeySource -> Annex () +populateAssociatedFiles key source = do + otherfs <- filter (/= keyFilename source) <$> Database.Keys.getAssociatedFiles key + obj <- calcRepo (gitAnnexLocation key) + forM_ otherfs $ + populatePointerFile key obj + cleanCruft :: KeySource -> Annex () cleanCruft source = when (contentLocation source /= keyFilename source) $ liftIO $ nukeFile $ contentLocation source diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 3e00011f5a..c7633d590c 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -347,7 +347,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do let source = keySource $ lockedDown change liftAnnex $ if isdirect then finishIngestDirect key source - else Database.Keys.addAssociatedFile key (keyFilename source) + else finishIngestUnlocked key source done change Nothing (keyFilename source) key removedKeysMap :: Bool -> InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) From d8a8c77a8fb9f891e75ed37b1944c7b7a6b03d09 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 16:55:49 -0400 Subject: [PATCH 84/96] move cleanOldKey into ingest --- Annex/Ingest.hs | 31 +++++++++++++++++++++++++++++++ Command/Smudge.hs | 30 +----------------------------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index 7f38e9beb7..36930baf48 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -13,6 +13,7 @@ module Annex.Ingest ( ingest, finishIngestDirect, finishIngestUnlocked, + cleanOldKeys, addLink, makeLink, restoreFile, @@ -27,6 +28,7 @@ import Annex.Content.Direct import Annex.Perms import Annex.Link import Annex.MetaData +import Logs.Location import qualified Annex import qualified Annex.Queue import qualified Database.Keys @@ -143,6 +145,7 @@ ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do -- linkAnnex falls back to copying if a file -- already has a hard link. cleanCruft source + cleanOldKeys (keyFilename source) key r <- linkAnnex key (keyFilename source) (Just cache) case r of LinkAnnexFailed -> failure "failed to link to annex" @@ -193,6 +196,34 @@ cleanCruft :: KeySource -> Annex () cleanCruft source = when (contentLocation source /= keyFilename source) $ liftIO $ nukeFile $ contentLocation source +-- If a worktree file was was hard linked to an annex object before, +-- modifying the file would have caused the object to have the wrong +-- content. Clean up from that. +cleanOldKeys :: FilePath -> Key -> Annex () +cleanOldKeys file newkey = do + oldkeys <- filter (/= newkey) + <$> Database.Keys.getAssociatedKey file + mapM_ go oldkeys + where + go key = do + obj <- calcRepo (gitAnnexLocation key) + caches <- Database.Keys.getInodeCaches key + unlessM (sameInodeCache obj caches) $ do + unlinkAnnex key + fs <- filter (/= file) + <$> Database.Keys.getAssociatedFiles key + fs' <- filterM (`sameInodeCache` caches) fs + case fs' of + -- If linkAnnex fails, the associated + -- file with the content is still present, + -- so no need for any recovery. + (f:_) -> do + ic <- withTSDelta (liftIO . genInodeCache f) + void $ linkAnnex key f ic + _ -> lostcontent + where + lostcontent = logStatus key InfoMissing + {- On error, put the file back so it doesn't seem to have vanished. - This can be called before or after the symlink is in place. -} restoreFile :: FilePath -> Key -> SomeException -> Annex a diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 2876326b8b..5666381b05 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -74,39 +74,11 @@ clean file = do if isJust (parseLinkOrPointer b) then liftIO $ B.hPut stdout b else ifM (shouldAnnex file) - ( do - k <- ingest file - oldkeys <- filter (/= k) - <$> Database.Keys.getAssociatedKey file - mapM_ (cleanOldKey file) oldkeys - Database.Keys.addAssociatedFile k file - liftIO $ emitPointer k + ( liftIO . emitPointer =<< ingest file , liftIO $ B.hPut stdout b ) stop --- If the file being cleaned was hard linked to the old key's annex object, --- modifying the file will have caused the object to have the wrong content. --- Clean up from that. -cleanOldKey :: FilePath -> Key -> Annex () -cleanOldKey modifiedfile key = do - obj <- calcRepo (gitAnnexLocation key) - caches <- Database.Keys.getInodeCaches key - unlessM (sameInodeCache obj caches) $ do - unlinkAnnex key - fs <- filter (/= modifiedfile) - <$> Database.Keys.getAssociatedFiles key - fs' <- filterM (`sameInodeCache` caches) fs - case fs' of - -- If linkAnnex fails, the file with the content - -- is still present, so no need for any recovery. - (f:_) -> do - ic <- withTSDelta (liftIO . genInodeCache f) - void $ linkAnnex key f ic - _ -> lostcontent - where - lostcontent = logStatus key InfoMissing - shouldAnnex :: FilePath -> Annex Bool shouldAnnex file = do matcher <- largeFilesMatcher From c4152654d29bfd38333f18a317644a4077e757ad Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 17:52:39 -0400 Subject: [PATCH 85/96] combine PendingAddChanges for the same file into one In v6 unlocked mode, this fixes a problem that was making eg, echo > file cause the assistant to copy the file to the annex object, instead of hard linking it. That because 2 change events were seen (one for opening the file and one for closing) and processed together the file was then locked down twice. Which meant it had mutiple hard links, and so prevented linkAnnex from hard linking it. There might be scenarios where multiple events come in, but staggered such that a file gets locked down repeatedly, and it would still be copied to the annex object in that case. --- Assistant/Threads/Committer.hs | 3 ++- Assistant/Types/Changes.hs | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index c7633d590c..0bdbb03782 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -55,7 +55,8 @@ commitThread = namedThread "Committer" $ do =<< annexDelayAdd <$> Annex.getGitConfig msg <- liftAnnex Command.Sync.commitMsg waitChangeTime $ \(changes, time) -> do - readychanges <- handleAdds havelsof delayadd changes + readychanges <- handleAdds havelsof delayadd $ + simplifyChanges changes if shouldCommit False time (length readychanges) readychanges then do debug diff --git a/Assistant/Types/Changes.hs b/Assistant/Types/Changes.hs index 8c2d02cab1..70c40523a0 100644 --- a/Assistant/Types/Changes.hs +++ b/Assistant/Types/Changes.hs @@ -1,10 +1,12 @@ {- git-annex assistant change tracking - - - Copyright 2012-2013 Joey Hess + - Copyright 2012-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE BangPatterns #-} + module Assistant.Types.Changes where import Types.KeySource @@ -14,6 +16,7 @@ import Annex.Ingest import Control.Concurrent.STM import Data.Time.Clock +import qualified Data.Set as S {- An un-ordered pool of Changes that have been noticed and should be - staged and committed. Changes will typically be in order, but ordering @@ -76,3 +79,22 @@ finishedChange c@(InProcessAddChange {}) k = Change , changeInfo = AddKeyChange k } finishedChange c _ = c + +{- Combine PendingAddChanges that are for the same file. + - Multiple such often get noticed when eg, a file is opened and then + - closed in quick succession. -} +simplifyChanges :: [Change] -> [Change] +simplifyChanges [c] = [c] +simplifyChanges cl = go cl S.empty [] + where + go [] _ l = reverse l + go (c:cs) seen l + | isPendingAddChange c = + if S.member f seen + then go cs seen l + else + let !seen' = S.insert f seen + in go cs seen' (c:l) + | otherwise = go cs seen (c:l) + where + f = changeFile c From 0c03629173d26f1e159fda8b1ea5fc0a89ec9267 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 18:03:47 -0400 Subject: [PATCH 86/96] clean up cruft in assistant fast rename code path --- Annex/Ingest.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index 36930baf48..b2eb27616d 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -150,7 +150,7 @@ ingest (Just (LockedDown lockingfile source)) = withTSDelta $ \delta -> do case r of LinkAnnexFailed -> failure "failed to link to annex" _ -> do - finishIngestUnlocked key source + finishIngestUnlocked' key source success key (Just cache) s gounlocked _ _ _ = failure "failed statting file" @@ -181,6 +181,11 @@ finishIngestDirect key source = do finishIngestUnlocked :: Key -> KeySource -> Annex () finishIngestUnlocked key source = do + cleanCruft source + finishIngestUnlocked' key source + +finishIngestUnlocked' :: Key -> KeySource -> Annex () +finishIngestUnlocked' key source = do Database.Keys.addAssociatedFile key (keyFilename source) populateAssociatedFiles key source From a82b7d00445c7d6010539cc9e55786f6e6f7a07b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 18:04:43 -0400 Subject: [PATCH 87/96] update --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index fb12926b6c..d1c649a514 100644 --- a/debian/changelog +++ b/debian/changelog @@ -15,7 +15,7 @@ git-annex (6.20151225) unstable; urgency=medium pointer file, and this change can be committed to the git repository. * add: In v6 mode, adds modified files to the annex. * assistant: In v6 mode, adds files in unlocked mode, so they can - continue to be modified. TODO + continue to be modified. -- Joey Hess Tue, 08 Dec 2015 11:14:03 -0400 From b3690c44994609527ae1ae885dfc31c0969da96c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Dec 2015 18:16:01 -0400 Subject: [PATCH 88/96] update --- doc/todo/smudge.mdwn | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index fe938bfbed..2e8479e99f 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -323,9 +323,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. * Still a few test suite failues for v6 with locked files. * Test suite should make pass for v6 with unlocked files. -* When the webapp creates a repo, it forces it into direct mode. But that - will fail when annex.version=6. Long-term, the assistant should make v6 - repos, but short-term, the assistant should make v5 repos in direct mode. * Reconcile staged changes into the associated files database, whenever the database is queried. This is needed to handle eg: git add largefile @@ -360,14 +357,23 @@ files to be unlocked, while the indirect upgrades don't touch the files. when pushing changes committed in such a repo. Ideally, should avoid committing implicit unlocks, or should prevent such commits leaking out in pushes. -* Dropping a smudged file causes git status to show it as modified, - because the timestamp has changed. Getting a smudged file can also - cause this. Upgrading a direct mode repo also leaves files in this state. +* Dropping a smudged file causes git status (and git annex status) + to show it as modified, because the timestamp has changed. + Getting a smudged file can also cause this. + Upgrading a direct mode repo also leaves files in this state. User can use `git add` to clear it up, but better to avoid this, by updating stat info in the index. (May need to use libgit2 to do this, cannot find any plumbing except git-update-index, which is very inneficient for smudged files.) +* Audit code for all uses of isDirect. These places almost always need + adjusting to support v6, if they haven't already. + +* Eventually (but not yet), make v6 the default for new repositories. + Note that the assistant forces repos into direct mode; that will need to + be changed then. +* Later still, remove support for direct mode, and enable automatic + v5 to v6 upgrades. ---- From 6d38f54db4cda291fd5196b0a771d964e0ab46f5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Dec 2015 14:59:58 -0400 Subject: [PATCH 89/96] split out Database.Queue from Database.Handle Fsck can use the queue for efficiency since it is write-heavy, and only reads a value before writing it. But, the queue is not suited to the Keys database. --- Database/Fsck.hs | 11 +-- Database/Handle.hs | 164 +++++++++++++++-------------------------- Database/Keys.hs | 14 ++-- Database/Keys/Types.hs | 4 +- Database/Queue.hs | 104 ++++++++++++++++++++++++++ 5 files changed, 177 insertions(+), 120 deletions(-) create mode 100644 Database/Queue.hs diff --git a/Database/Fsck.hs b/Database/Fsck.hs index b0e56f6c0d..d176690a6c 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -21,7 +21,7 @@ module Database.Fsck ( ) where import Database.Types -import qualified Database.Handle as H +import qualified Database.Queue as H import Locations import Utility.PosixFiles import Utility.Exception @@ -37,7 +37,7 @@ import Database.Persist.TH import Database.Esqueleto hiding (Key) import Data.Time.Clock -data FsckHandle = FsckHandle H.DbHandle UUID +data FsckHandle = FsckHandle H.DbQueue UUID {- Each key stored in the database has already been fscked as part - of the latest incremental fsck pass. -} @@ -77,7 +77,7 @@ openDb u = do void $ tryIO $ removeDirectoryRecursive dbdir rename tmpdbdir dbdir lockFileCached =<< fromRepo (gitAnnexFsckDbLock u) - h <- liftIO $ H.openDb db "fscked" + h <- liftIO $ H.openDbQueue db "fscked" -- work around https://github.com/yesodweb/persistent/issues/474 liftIO setConsoleEncoding @@ -86,7 +86,7 @@ openDb u = do closeDb :: FsckHandle -> Annex () closeDb (FsckHandle h u) = do - liftIO $ H.closeDb h + liftIO $ H.closeDbQueue h unlockFile =<< fromRepo (gitAnnexFsckDbLock u) addDb :: FsckHandle -> Key -> IO () @@ -102,8 +102,9 @@ addDb (FsckHandle h _) k = H.queueDb h checkcommit $ now <- getCurrentTime return $ diffUTCTime lastcommittime now > 300 +{- Doesn't know about keys that were just added with addDb. -} inDb :: FsckHandle -> Key -> IO Bool -inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey +inDb (FsckHandle h _) = H.queryDbQueue h . inDb' . toSKey inDb' :: SKey -> SqlPersistM Bool inDb' sk = do diff --git a/Database/Handle.hs b/Database/Handle.hs index 67f7592657..a45fad22e2 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -11,16 +11,14 @@ module Database.Handle ( DbHandle, initDb, openDb, + TableName, queryDb, closeDb, - Size, - queueDb, - flushQueueDb, commitDb, + commitDb', ) where import Utility.Exception -import Utility.Monad import Database.Persist.Sqlite import qualified Database.Sqlite as Sqlite @@ -33,18 +31,17 @@ import qualified Data.Text as T import Control.Monad.Trans.Resource (runResourceT) import Control.Monad.Logger (runNoLoggingT) import Data.List -import Data.Time.Clock import System.IO {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} -data DbHandle = DbHandle (Async ()) (MVar Job) (MVar DbQueue) +data DbHandle = DbHandle (Async ()) (MVar Job) {- Ensures that the database is initialized. Pass the migration action for - the database. - - - The database is put into WAL mode, to prevent readers from blocking - - writers, and prevent a writer from blocking readers. + - The database is initialized using WAL mode, to prevent readers + - from blocking writers, and prevent a writer from blocking readers. -} initDb :: FilePath -> SqlPersistM () -> IO () initDb f migration = do @@ -60,22 +57,71 @@ enableWAL db = do void $ Sqlite.finalize stmt Sqlite.close conn +{- Name of a table that should exist once the database is initialized. -} +type TableName = String + {- Opens the database, but does not perform any migrations. Only use - if the database is known to exist and have the right tables. -} openDb :: FilePath -> TableName -> IO DbHandle openDb db tablename = do jobs <- newEmptyMVar worker <- async (workerThread (T.pack db) tablename jobs) - q <- newMVar =<< emptyDbQueue - return $ DbHandle worker jobs q + return $ DbHandle worker jobs + +closeDb :: DbHandle -> IO () +closeDb (DbHandle worker jobs) = do + putMVar jobs CloseJob + wait worker + +{- Makes a query using the DbHandle. This should not be used to make + - changes to the database! + - + - Note that the action is not run by the calling thread, but by a + - worker thread. Exceptions are propigated to the calling thread. + - + - Only one action can be run at a time against a given DbHandle. + - If called concurrently in the same process, this will block until + - it is able to run. + -} +queryDb :: DbHandle -> SqlPersistM a -> IO a +queryDb (DbHandle _ jobs) a = do + res <- newEmptyMVar + putMVar jobs $ QueryJob $ + liftIO . putMVar res =<< tryNonAsync a + (either throwIO return =<< takeMVar res) + `catchNonAsync` (const $ error "sqlite query crashed") + +{- Writes a change to the database. + - + - If a database is opened multiple times and there's a concurrent writer, + - the write could fail. Retries repeatedly for up to 10 seconds, + - which should avoid all but the most exceptional problems. + -} +commitDb :: DbHandle -> SqlPersistM () -> IO () +commitDb h wa = robustly Nothing 100 (commitDb' h wa) + where + robustly :: Maybe SomeException -> Int -> IO (Either SomeException ()) -> IO () + robustly e 0 _ = error $ "failed to commit changes to sqlite database: " ++ show e + robustly _ n a = do + r <- a + case r of + Right _ -> return () + Left e -> do + threadDelay 100000 -- 1/10th second + robustly (Just e) (n-1) a + +commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) +commitDb' (DbHandle _ jobs) a = do + res <- newEmptyMVar + putMVar jobs $ ChangeJob $ \runner -> + liftIO $ putMVar res =<< tryNonAsync (runner a) + takeMVar res data Job = QueryJob (SqlPersistM ()) | ChangeJob ((SqlPersistM () -> IO ()) -> IO ()) | CloseJob -type TableName = String - workerThread :: T.Text -> TableName -> MVar Job -> IO () workerThread db tablename jobs = catchNonAsync (run loop) showerr where @@ -121,97 +167,3 @@ workerThread db tablename jobs = catchNonAsync (run loop) showerr -- This should succeed for any table. nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1" - -{- Makes a query using the DbHandle. This should not be used to make - - changes to the database! - - - - Note that the action is not run by the calling thread, but by a - - worker thread. Exceptions are propigated to the calling thread. - - - - Only one action can be run at a time against a given DbHandle. - - If called concurrently in the same process, this will block until - - it is able to run. - -} -queryDb :: DbHandle -> SqlPersistM a -> IO a -queryDb (DbHandle _ jobs _) a = do - res <- newEmptyMVar - putMVar jobs $ QueryJob $ - liftIO . putMVar res =<< tryNonAsync a - (either throwIO return =<< takeMVar res) - `catchNonAsync` (const $ error "sqlite query crashed") - -closeDb :: DbHandle -> IO () -closeDb h@(DbHandle worker jobs _) = do - putMVar jobs CloseJob - wait worker - flushQueueDb h - -type Size = Int - -type LastCommitTime = UTCTime - -{- A queue of actions to perform, with a count of the number of actions - - queued, and a last commit time. -} -data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ()) - -emptyDbQueue :: IO DbQueue -emptyDbQueue = do - now <- getCurrentTime - return $ DbQueue 0 now (return ()) - -{- Queues a change to be made to the database. It will be buffered - - to be committed later, unless the commitchecker action returns true. - - - - (Be sure to call closeDb or flushQueueDb to ensure the change - - gets committed.) - - - - Transactions built up by queueDb are sent to sqlite all at once. - - If sqlite fails due to another change being made concurrently by another - - process, the transaction is put back in the queue. This solves - - the sqlite multiple writer problem. - -} -queueDb - :: DbHandle - -> (Size -> LastCommitTime -> IO Bool) - -> SqlPersistM () - -> IO () -queueDb h@(DbHandle _ _ qvar) commitchecker a = do - DbQueue sz lastcommittime qa <- takeMVar qvar - let !sz' = sz + 1 - let qa' = qa >> a - let enqueue = putMVar qvar - ifM (commitchecker sz' lastcommittime) - ( do - r <- commitDb h qa' - case r of - Left _ -> enqueue $ DbQueue sz' lastcommittime qa' - Right _ -> do - now <- getCurrentTime - enqueue $ DbQueue 0 now (return ()) - , enqueue $ DbQueue sz' lastcommittime qa' - ) - -{- If flushing the queue fails, this could be because there is another - - writer to the database. Retry repeatedly for up to 10 seconds. -} -flushQueueDb :: DbHandle -> IO () -flushQueueDb h@(DbHandle _ _ qvar) = do - DbQueue sz _ qa <- takeMVar qvar - when (sz > 0) $ - robustly Nothing 100 (commitDb h qa) - where - robustly :: Maybe SomeException -> Int -> IO (Either SomeException ()) -> IO () - robustly e 0 _ = error $ "failed to commit changes to sqlite database: " ++ show e - robustly _ n a = do - r <- a - case r of - Right _ -> return () - Left e -> do - threadDelay 100000 -- 1/10th second - robustly (Just e) (n-1) a - -commitDb :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) -commitDb (DbHandle _ jobs _) a = do - res <- newEmptyMVar - putMVar jobs $ ChangeJob $ \runner -> - liftIO $ putMVar res =<< tryNonAsync (runner a) - takeMVar res diff --git a/Database/Keys.hs b/Database/Keys.hs index a0c5b1a04c..425f1d54ba 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -28,7 +28,7 @@ module Database.Keys ( import Database.Types import Database.Keys.Types -import qualified Database.Handle as H +import qualified Database.Queue as H import Locations import Common hiding (delete) import Annex @@ -72,7 +72,7 @@ openDb = withExclusiveLock gitAnnexKeysDbLock $ do runMigrationSilent migrateKeysDb setAnnexDirPerm dbdir setAnnexFilePerm db - h <- liftIO $ H.openDb db "content" + h <- liftIO $ H.openDbQueue db "content" -- work around https://github.com/yesodweb/persistent/issues/474 liftIO setConsoleEncoding @@ -80,9 +80,9 @@ openDb = withExclusiveLock gitAnnexKeysDbLock $ do return $ DbHandle h closeDb :: DbHandle -> IO () -closeDb (DbHandle h) = H.closeDb h +closeDb (DbHandle h) = H.closeDbQueue h -withDbHandle :: (H.DbHandle -> IO a) -> Annex a +withDbHandle :: (H.DbQueue -> IO a) -> Annex a withDbHandle a = bracket openDb (liftIO . closeDb) (\(DbHandle h) -> liftIO (a h)) addAssociatedFile :: Key -> FilePath -> Annex () @@ -98,7 +98,7 @@ addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ d {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} getAssociatedFiles :: Key -> Annex [FilePath] -getAssociatedFiles k = withDbHandle $ \h -> H.queryDb h $ +getAssociatedFiles k = withDbHandle $ \h -> H.queryDbQueue h $ getAssociatedFiles' $ toSKey k getAssociatedFiles' :: SKey -> SqlPersistM [FilePath] @@ -111,7 +111,7 @@ getAssociatedFiles' sk = do {- Gets any keys that are on record as having a particular associated file. - (Should be one or none but the database doesn't enforce that.) -} getAssociatedKey :: FilePath -> Annex [Key] -getAssociatedKey f = withDbHandle $ \h -> H.queryDb h $ +getAssociatedKey f = withDbHandle $ \h -> H.queryDbQueue h $ getAssociatedKey' f getAssociatedKey' :: FilePath -> SqlPersistM [Key] @@ -140,7 +140,7 @@ addInodeCaches k is = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ {- A key may have multiple InodeCaches; one for the annex object, and one - for each pointer file that is a copy of it. -} getInodeCaches :: Key -> Annex [InodeCache] -getInodeCaches k = withDbHandle $ \h -> H.queryDb h $ do +getInodeCaches k = withDbHandle $ \h -> H.queryDbQueue h $ do l <- select $ from $ \r -> do where_ (r ^. ContentKey ==. val sk) return (r ^. ContentCache) diff --git a/Database/Keys/Types.hs b/Database/Keys/Types.hs index a627b3ca5b..3fabafcf25 100644 --- a/Database/Keys/Types.hs +++ b/Database/Keys/Types.hs @@ -9,6 +9,6 @@ module Database.Keys.Types ( DbHandle(..) ) where -import qualified Database.Handle as H +import qualified Database.Queue as H -newtype DbHandle = DbHandle H.DbHandle +newtype DbHandle = DbHandle H.DbQueue diff --git a/Database/Queue.hs b/Database/Queue.hs new file mode 100644 index 0000000000..1498547573 --- /dev/null +++ b/Database/Queue.hs @@ -0,0 +1,104 @@ +{- Persistent sqlite database queues + - + - Copyright 2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Database.Queue ( + DbQueue, + initDb, + openDbQueue, + queryDbQueue, + closeDbQueue, + QueueSize, + queueDb, +) where + +import Utility.Monad +import Database.Handle + +import Database.Persist.Sqlite +import Control.Monad +import Control.Concurrent +import Data.Time.Clock + +{- A DbQueue wraps a DbHandle, adding a queue of writes to perform. + - + - This is efficient when there are frequent writes, but + - reads will not immediately have access to queued writes. -} +data DbQueue = DQ DbHandle (MVar Queue) + +{- Opens the database queue, but does not perform any migrations. Only use + - if the database is known to exist and have the right tables; ie after + - running initDb. -} +openDbQueue :: FilePath -> TableName -> IO DbQueue +openDbQueue db tablename = DQ + <$> openDb db tablename + <*> (newMVar =<< emptyQueue) + +{- Must be called to ensure queued changes get written to the database. -} +closeDbQueue :: DbQueue -> IO () +closeDbQueue h@(DQ hdl _) = do + flushDbQueue h + closeDb hdl + +{- Makes a queury using the DbQueue. This should not be used to make + - changes to the database! + - + - Queries will not return changes that have been recently queued, + - so use with care. + -} +queryDbQueue :: DbQueue -> SqlPersistM a -> IO a +queryDbQueue (DQ hdl _) = queryDb hdl + +{- A queue of actions to perform, with a count of the number of actions + - queued, and a last commit time. -} +data Queue = Queue QueueSize LastCommitTime (SqlPersistM ()) + +type QueueSize = Int + +type LastCommitTime = UTCTime + +emptyQueue :: IO Queue +emptyQueue = do + now <- getCurrentTime + return $ Queue 0 now (return ()) + +flushDbQueue :: DbQueue -> IO () +flushDbQueue (DQ hdl qvar) = do + Queue sz _ qa <- takeMVar qvar + when (sz > 0) $ + commitDb hdl qa + +{- Queues a change to be made to the database. It will be queued + - to be committed later, unless the commitchecker action returns true, + - in which case any previously queued changes are also committed. + - + - Transactions built up by queueDb are sent to sqlite all at once. + - If sqlite fails due to another change being made concurrently by another + - process, the transaction is put back in the queue. This avoids + - the sqlite multiple writer problem. + -} +queueDb + :: DbQueue + -> (QueueSize -> LastCommitTime -> IO Bool) + -> SqlPersistM () + -> IO () +queueDb (DQ hdl qvar) commitchecker a = do + Queue sz lastcommittime qa <- takeMVar qvar + let !sz' = sz + 1 + let qa' = qa >> a + let enqueue = putMVar qvar + ifM (commitchecker sz' lastcommittime) + ( do + r <- commitDb' hdl qa' + case r of + Left _ -> enqueue $ Queue sz' lastcommittime qa' + Right _ -> do + now <- getCurrentTime + enqueue $ Queue 0 now (return ()) + , enqueue $ Queue sz' lastcommittime qa' + ) From d43ac8056b4a1178a5d32f66d949c00727c4c8db Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Dec 2015 16:11:36 -0400 Subject: [PATCH 90/96] auto-close database connections when MVar is GCed --- Database/Handle.hs | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/Database/Handle.hs b/Database/Handle.hs index a45fad22e2..8790b3218f 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -26,7 +26,7 @@ import Control.Monad import Control.Monad.IO.Class (liftIO) import Control.Concurrent import Control.Concurrent.Async -import Control.Exception (throwIO) +import Control.Exception (throwIO, BlockedIndefinitelyOnMVar(..)) import qualified Data.Text as T import Control.Monad.Trans.Resource (runResourceT) import Control.Monad.Logger (runNoLoggingT) @@ -68,6 +68,8 @@ openDb db tablename = do worker <- async (workerThread (T.pack db) tablename jobs) return $ DbHandle worker jobs +{- This is optional; when the DbHandle gets garbage collected it will + - auto-close. -} closeDb :: DbHandle -> IO () closeDb (DbHandle worker jobs) = do putMVar jobs CloseJob @@ -123,29 +125,38 @@ data Job | CloseJob workerThread :: T.Text -> TableName -> MVar Job -> IO () -workerThread db tablename jobs = catchNonAsync (run loop) showerr +workerThread db tablename jobs = + catchNonAsync (runSqliteRobustly tablename db loop) showerr where - showerr e = liftIO $ hPutStrLn stderr $ + showerr e = hPutStrLn stderr $ "sqlite worker thread crashed: " ++ show e + getjob :: IO (Either BlockedIndefinitelyOnMVar Job) + getjob = try $ takeMVar jobs + loop = do - job <- liftIO $ takeMVar jobs + job <- liftIO getjob case job of - QueryJob a -> a >> loop + -- Exception is thrown when the MVar is garbage + -- collected, which means the whole DbHandle + -- is not used any longer. Shutdown cleanly. + Left BlockedIndefinitelyOnMVar -> return () + Right CloseJob -> return () + Right (QueryJob a) -> a >> loop -- change is run in a separate database connection -- since sqlite only supports a single writer at a -- time, and it may crash the database connection - ChangeJob a -> liftIO (a run) >> loop - CloseJob -> return () + Right (ChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop - -- like runSqlite, but calls settle on the raw sql Connection. - run a = do - conn <- Sqlite.open db - settle conn - runResourceT $ runNoLoggingT $ - withSqlConn (wrapConnection conn) $ - runSqlConn a - +-- like runSqlite, but calls settle on the raw sql Connection. +runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a +runSqliteRobustly tablename db a = do + conn <- Sqlite.open db + settle conn + runResourceT $ runNoLoggingT $ + withSqlConn (wrapConnection conn) $ + runSqlConn a + where -- Work around a bug in sqlite: New database connections can -- sometimes take a while to become usable; select statements will -- fail with ErrorBusy for some time. So, loop until a select From 959b060e26392b3dd5c99722f3f05bddead86793 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Dec 2015 16:36:08 -0400 Subject: [PATCH 91/96] allow flushDbQueue to be run repeatedly --- Database/Queue.hs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Database/Queue.hs b/Database/Queue.hs index 1498547573..11cc23b2d9 100644 --- a/Database/Queue.hs +++ b/Database/Queue.hs @@ -13,6 +13,7 @@ module Database.Queue ( openDbQueue, queryDbQueue, closeDbQueue, + flushDbQueue, QueueSize, queueDb, ) where @@ -39,14 +40,25 @@ openDbQueue db tablename = DQ <$> openDb db tablename <*> (newMVar =<< emptyQueue) -{- Must be called to ensure queued changes get written to the database. -} +{- This or flushDbQueue must be called, eg at program exit to ensure + - queued changes get written to the database. -} closeDbQueue :: DbQueue -> IO () closeDbQueue h@(DQ hdl _) = do flushDbQueue h closeDb hdl -{- Makes a queury using the DbQueue. This should not be used to make - - changes to the database! +{- Blocks until all queued changes have been written to the database. -} +flushDbQueue :: DbQueue -> IO () +flushDbQueue (DQ hdl qvar) = do + q@(Queue sz _ qa) <- takeMVar qvar + if sz > 0 + then do + commitDb hdl qa + putMVar qvar =<< emptyQueue + else putMVar qvar q + +{- Makes a query using the DbQueue's database connection. + - This should not be used to make changes to the database! - - Queries will not return changes that have been recently queued, - so use with care. @@ -67,12 +79,6 @@ emptyQueue = do now <- getCurrentTime return $ Queue 0 now (return ()) -flushDbQueue :: DbQueue -> IO () -flushDbQueue (DQ hdl qvar) = do - Queue sz _ qa <- takeMVar qvar - when (sz > 0) $ - commitDb hdl qa - {- Queues a change to be made to the database. It will be queued - to be committed later, unless the commitchecker action returns true, - in which case any previously queued changes are also committed. @@ -97,8 +103,6 @@ queueDb (DQ hdl qvar) commitchecker a = do r <- commitDb' hdl qa' case r of Left _ -> enqueue $ Queue sz' lastcommittime qa' - Right _ -> do - now <- getCurrentTime - enqueue $ Queue 0 now (return ()) + Right _ -> enqueue =<< emptyQueue , enqueue $ Queue sz' lastcommittime qa' ) From 4224fae71f0833d75eebed8521553adaae07ae23 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Dec 2015 18:34:51 -0400 Subject: [PATCH 92/96] optimise read and write for Keys database (untested) Writes are optimised by queueing up multiple writes when possible. The queue is flushed after the Annex monad action finishes. That makes it happen on program termination, and also whenever a nested Annex monad action finishes. Reads are optimised by checking once (per AnnexState) if the database exists. If the database doesn't exist yet, all reads return mempty. Reads also cause queued writes to be flushed, so reads will always be consistent with writes (as long as they're made inside the same Annex monad). A future optimisation path would be to determine when that's not necessary, which is probably most of the time, and avoid flushing unncessarily. Design notes for this commit: - separate reads from writes - reuse a handle which is left open until program exit or until the MVar goes out of scope (and autoclosed then) - writes are queued - queue is flushed periodically - immediate queue flush before any read - auto-flush queue when database handle is garbage collected - flush queue on exit from Annex monad (Note that this may happen repeatedly for a single database connection; or a connection may be reused for multiple Annex monad actions, possibly even concurrent ones.) - if database does not exist (or is empty) the handle is not opened by reads; reads instead return empty results - writes open the handle if it was not open previously --- Annex.hs | 16 ++-- Database/Fsck.hs | 4 - Database/Handle.hs | 5 + Database/Keys.hs | 173 +++++++++++++++++++++++++--------- Database/Keys/Handle.hs | 55 +++++++++++ Database/Keys/Types.hs | 14 --- Database/Queue.hs | 1 - Messages.hs | 8 -- Utility/FileSystemEncoding.hs | 8 ++ doc/todo/smudge.mdwn | 20 ++-- 10 files changed, 213 insertions(+), 91 deletions(-) create mode 100644 Database/Keys/Handle.hs delete mode 100644 Database/Keys/Types.hs diff --git a/Annex.hs b/Annex.hs index c9a4ef6a05..a8368f81ee 100644 --- a/Annex.hs +++ b/Annex.hs @@ -60,6 +60,7 @@ import Types.NumCopies import Types.LockCache import Types.DesktopNotify import Types.CleanupActions +import qualified Database.Keys.Handle as Keys #ifdef WITH_QUVI import Utility.Quvi (QuviVersion) #endif @@ -134,6 +135,7 @@ data AnnexState = AnnexState , desktopnotify :: DesktopNotify , workers :: [Either AnnexState (Async AnnexState)] , concurrentjobs :: Maybe Int + , keysdbhandle :: Maybe Keys.DbHandle } newState :: GitConfig -> Git.Repo -> AnnexState @@ -179,6 +181,7 @@ newState c r = AnnexState , desktopnotify = mempty , workers = [] , concurrentjobs = Nothing + , keysdbhandle = Nothing } {- Makes an Annex state object for the specified git repo. @@ -193,25 +196,26 @@ new r = do {- Performs an action in the Annex monad from a starting state, - returning a new state. -} run :: AnnexState -> Annex a -> IO (a, AnnexState) -run s a = do - mvar <- newMVar s +run s a = flip run' a =<< newMVar s + +run' :: MVar AnnexState -> Annex a -> IO (a, AnnexState) +run' mvar a = do r <- runReaderT (runAnnex a) mvar s' <- takeMVar mvar + maybe noop Keys.flushDbQueue (keysdbhandle s') return (r, s') {- Performs an action in the Annex monad from a starting state, - and throws away the new state. -} eval :: AnnexState -> Annex a -> IO a -eval s a = do - mvar <- newMVar s - runReaderT (runAnnex a) mvar +eval s a = fst <$> run s a {- Makes a runner action, that allows diving into IO and from inside - the IO action, running an Annex action. -} makeRunner :: Annex (Annex a -> IO a) makeRunner = do mvar <- ask - return $ \a -> runReaderT (runAnnex a) mvar + return $ \a -> fst <$> run' mvar a getState :: (AnnexState -> v) -> Annex v getState selector = do diff --git a/Database/Fsck.hs b/Database/Fsck.hs index d176690a6c..da6e6263a8 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -78,10 +78,6 @@ openDb u = do rename tmpdbdir dbdir lockFileCached =<< fromRepo (gitAnnexFsckDbLock u) h <- liftIO $ H.openDbQueue db "fscked" - - -- work around https://github.com/yesodweb/persistent/issues/474 - liftIO setConsoleEncoding - return $ FsckHandle h u closeDb :: FsckHandle -> Annex () diff --git a/Database/Handle.hs b/Database/Handle.hs index 8790b3218f..748feaa976 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -19,6 +19,7 @@ module Database.Handle ( ) where import Utility.Exception +import Utility.FileSystemEncoding import Database.Persist.Sqlite import qualified Database.Sqlite as Sqlite @@ -66,6 +67,10 @@ openDb :: FilePath -> TableName -> IO DbHandle openDb db tablename = do jobs <- newEmptyMVar worker <- async (workerThread (T.pack db) tablename jobs) + + -- work around https://github.com/yesodweb/persistent/issues/474 + liftIO setConsoleEncoding + return $ DbHandle worker jobs {- This is optional; when the DbHandle gets garbage collected it will diff --git a/Database/Keys.hs b/Database/Keys.hs index 425f1d54ba..c51a163c47 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -12,8 +12,6 @@ module Database.Keys ( DbHandle, - openDb, - closeDb, addAssociatedFile, getAssociatedFiles, getAssociatedKey, @@ -27,7 +25,7 @@ module Database.Keys ( ) where import Database.Types -import Database.Keys.Types +import Database.Keys.Handle import qualified Database.Queue as H import Locations import Common hiding (delete) @@ -35,12 +33,12 @@ import Annex import Types.Key import Annex.Perms import Annex.LockFile -import Messages import Utility.InodeCache import Annex.InodeSentinal import Database.Persist.TH import Database.Esqueleto hiding (Key) +import Data.Time.Clock share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| Associated @@ -53,7 +51,86 @@ Content KeyCacheIndex key cache |] -{- Opens the database, creating it if it doesn't exist yet. +newtype ReadHandle = ReadHandle H.DbQueue + +type Reader v = ReadHandle -> Annex v + +{- Runs an action that reads from the database. + - + - If the database doesn't already exist, it's not created; mempty is + - returned instead. This way, when the keys database is not in use, + - there's minimal overhead in checking it. + - + - If the database is already open, any writes are flushed to it, to ensure + - consistency. + - + - Any queued writes will be flushed before the read. + -} +runReader :: Monoid v => Reader v -> Annex v +runReader a = do + h <- getDbHandle + withDbState h go + where + go DbEmpty = return (mempty, DbEmpty) + go st@(DbOpen qh) = do + liftIO $ H.flushDbQueue qh + v <- a (ReadHandle qh) + return (v, st) + go DbClosed = do + st' <- openDb False DbClosed + v <- case st' of + (DbOpen qh) -> a (ReadHandle qh) + _ -> return mempty + return (v, st') + +readDb :: SqlPersistM a -> ReadHandle -> Annex a +readDb a (ReadHandle h) = liftIO $ H.queryDbQueue h a + +newtype WriteHandle = WriteHandle H.DbQueue + +type Writer = WriteHandle -> Annex () + +{- Runs an action that writes to the database. Typically this is used to + - queue changes, which will be flushed at a later point. + - + - The database is created if it doesn't exist yet. -} +runWriter :: Writer -> Annex () +runWriter a = do + h <- getDbHandle + withDbState h go + where + go st@(DbOpen qh) = do + v <- a (WriteHandle qh) + return (v, st) + go st = do + st' <- openDb True st + v <- case st' of + DbOpen qh -> a (WriteHandle qh) + _ -> error "internal" + return (v, st) + +queueDb :: SqlPersistM () -> WriteHandle -> Annex () +queueDb a (WriteHandle h) = liftIO $ H.queueDb h checkcommit a + where + -- commit queue after 1000 changes or 5 minutes, whichever comes first + checkcommit sz lastcommittime + | sz > 1000 = return True + | otherwise = do + now <- getCurrentTime + return $ diffUTCTime lastcommittime now > 300 + +{- Gets the handle cached in Annex state; creates a new one if it's not yet + - available, but doesn't open the database. -} +getDbHandle :: Annex DbHandle +getDbHandle = go =<< getState keysdbhandle + where + go (Just h) = pure h + go Nothing = do + h <- liftIO newDbHandle + changeState $ \s -> s { keysdbhandle = Just h } + return h + +{- Opens the database, perhaps creating it if it doesn't exist yet. - - Multiple readers and writers can have the database open at the same - time. Database.Handle deals with the concurrency issues. @@ -61,32 +138,32 @@ Content - the database doesn't exist yet, one caller wins the lock and - can create it undisturbed. -} -openDb :: Annex DbHandle -openDb = withExclusiveLock gitAnnexKeysDbLock $ do +openDb :: Bool -> DbState -> Annex DbState +openDb _ st@(DbOpen _) = return st +openDb False DbEmpty = return DbEmpty +openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do dbdir <- fromRepo gitAnnexKeysDb let db = dbdir "db" - unlessM (liftIO $ doesFileExist db) $ do - liftIO $ do - createDirectoryIfMissing True dbdir - H.initDb db $ void $ - runMigrationSilent migrateKeysDb - setAnnexDirPerm dbdir - setAnnexFilePerm db - h <- liftIO $ H.openDbQueue db "content" - - -- work around https://github.com/yesodweb/persistent/issues/474 - liftIO setConsoleEncoding - - return $ DbHandle h - -closeDb :: DbHandle -> IO () -closeDb (DbHandle h) = H.closeDbQueue h - -withDbHandle :: (H.DbQueue -> IO a) -> Annex a -withDbHandle a = bracket openDb (liftIO . closeDb) (\(DbHandle h) -> liftIO (a h)) + dbexists <- liftIO $ doesFileExist db + case (dbexists, createdb) of + (True, _) -> open db + (False, True) -> do + liftIO $ do + createDirectoryIfMissing True dbdir + H.initDb db $ void $ + runMigrationSilent migrateKeysDb + setAnnexDirPerm dbdir + setAnnexFilePerm db + open db + (False, False) -> return DbEmpty + where + open db = liftIO $ DbOpen <$> H.openDbQueue db "content" addAssociatedFile :: Key -> FilePath -> Annex () -addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do +addAssociatedFile k f = runWriter $ addAssociatedFile' k f + +addAssociatedFile' :: Key -> FilePath -> Writer +addAssociatedFile' k f = queueDb $ do -- If the same file was associated with a different key before, -- remove that. delete $ from $ \r -> do @@ -98,11 +175,10 @@ addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ d {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} getAssociatedFiles :: Key -> Annex [FilePath] -getAssociatedFiles k = withDbHandle $ \h -> H.queryDbQueue h $ - getAssociatedFiles' $ toSKey k +getAssociatedFiles = runReader . getAssociatedFiles' . toSKey -getAssociatedFiles' :: SKey -> SqlPersistM [FilePath] -getAssociatedFiles' sk = do +getAssociatedFiles' :: SKey -> Reader [FilePath] +getAssociatedFiles' sk = readDb $ do l <- select $ from $ \r -> do where_ (r ^. AssociatedKey ==. val sk) return (r ^. AssociatedFile) @@ -111,22 +187,22 @@ getAssociatedFiles' sk = do {- Gets any keys that are on record as having a particular associated file. - (Should be one or none but the database doesn't enforce that.) -} getAssociatedKey :: FilePath -> Annex [Key] -getAssociatedKey f = withDbHandle $ \h -> H.queryDbQueue h $ - getAssociatedKey' f +getAssociatedKey = runReader . getAssociatedKey' -getAssociatedKey' :: FilePath -> SqlPersistM [Key] -getAssociatedKey' f = do +getAssociatedKey' :: FilePath -> Reader [Key] +getAssociatedKey' f = readDb $ do l <- select $ from $ \r -> do where_ (r ^. AssociatedFile ==. val f) return (r ^. AssociatedKey) return $ map (fromSKey . unValue) l removeAssociatedFile :: Key -> FilePath -> Annex () -removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ +removeAssociatedFile k = runWriter . removeAssociatedFile' (toSKey k) + +removeAssociatedFile' :: SKey -> FilePath -> Writer +removeAssociatedFile' sk f = queueDb $ delete $ from $ \r -> do where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f) - where - sk = toSKey k {- Stats the files, and stores their InodeCaches. -} storeInodeCaches :: Key -> [FilePath] -> Annex () @@ -134,23 +210,28 @@ storeInodeCaches k fs = withTSDelta $ \d -> addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs) addInodeCaches :: Key -> [InodeCache] -> Annex () -addInodeCaches k is = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ - forM_ is $ \i -> insertUnique $ Content (toSKey k) (toSInodeCache i) +addInodeCaches k is = runWriter $ addInodeCaches' (toSKey k) is + +addInodeCaches' :: SKey -> [InodeCache] -> Writer +addInodeCaches' sk is = queueDb $ + forM_ is $ \i -> insertUnique $ Content sk (toSInodeCache i) {- A key may have multiple InodeCaches; one for the annex object, and one - for each pointer file that is a copy of it. -} getInodeCaches :: Key -> Annex [InodeCache] -getInodeCaches k = withDbHandle $ \h -> H.queryDbQueue h $ do +getInodeCaches = runReader . getInodeCaches' . toSKey + +getInodeCaches' :: SKey -> Reader [InodeCache] +getInodeCaches' sk = readDb $ do l <- select $ from $ \r -> do where_ (r ^. ContentKey ==. val sk) return (r ^. ContentCache) return $ map (fromSInodeCache . unValue) l - where - sk = toSKey k removeInodeCaches :: Key -> Annex () -removeInodeCaches k = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ +removeInodeCaches = runWriter . removeInodeCaches' . toSKey + +removeInodeCaches' :: SKey -> Writer +removeInodeCaches' sk = queueDb $ delete $ from $ \r -> do where_ (r ^. ContentKey ==. val sk) - where - sk = toSKey k diff --git a/Database/Keys/Handle.hs b/Database/Keys/Handle.hs new file mode 100644 index 0000000000..5a5912b0ba --- /dev/null +++ b/Database/Keys/Handle.hs @@ -0,0 +1,55 @@ +{- Handle for the Keys database. + - + - Copyright 2015 Joey Hess + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +module Database.Keys.Handle ( + DbHandle, + newDbHandle, + DbState(..), + withDbState, + flushDbQueue, +) where + +import qualified Database.Queue as H +import Utility.Exception + +import Control.Concurrent +import Control.Monad.IO.Class (liftIO, MonadIO) + +-- The MVar is always left full except when actions are run +-- that access the database. +newtype DbHandle = DbHandle (MVar DbState) + +-- The database can be closed or open, but it also may have been +-- tried to open (for read) and didn't exist yet. +data DbState = DbClosed | DbOpen H.DbQueue | DbEmpty + +newDbHandle :: IO DbHandle +newDbHandle = DbHandle <$> newMVar DbClosed + +-- Runs an action on the state of the handle, which can change its state. +-- The MVar is empty while the action runs, which blocks other users +-- of the handle from running. +withDbState + :: (MonadIO m, MonadCatch m) + => DbHandle + -> (DbState + -> m (v, DbState)) + -> m v +withDbState (DbHandle mvar) a = do + st <- liftIO $ takeMVar mvar + go st `onException` (liftIO $ putMVar mvar st) + where + go st = do + (v, st') <- a st + liftIO $ putMVar mvar st' + return v + +flushDbQueue :: DbHandle -> IO () +flushDbQueue (DbHandle mvar) = go =<< readMVar mvar + where + go (DbOpen qh) = H.flushDbQueue qh + go _ = return () diff --git a/Database/Keys/Types.hs b/Database/Keys/Types.hs deleted file mode 100644 index 3fabafcf25..0000000000 --- a/Database/Keys/Types.hs +++ /dev/null @@ -1,14 +0,0 @@ -{- Sqlite database of information about Keys, data types. - - - - Copyright 2015 Joey Hess - -: - - Licensed under the GNU GPL version 3 or higher. - -} - -module Database.Keys.Types ( - DbHandle(..) -) where - -import qualified Database.Queue as H - -newtype DbHandle = DbHandle H.DbQueue diff --git a/Database/Queue.hs b/Database/Queue.hs index 11cc23b2d9..99fbacb9b6 100644 --- a/Database/Queue.hs +++ b/Database/Queue.hs @@ -22,7 +22,6 @@ import Utility.Monad import Database.Handle import Database.Persist.Sqlite -import Control.Monad import Control.Concurrent import Data.Time.Clock diff --git a/Messages.hs b/Messages.hs index a49f20711b..b62e6d2a7b 100644 --- a/Messages.hs +++ b/Messages.hs @@ -31,7 +31,6 @@ module Messages ( showHeader, showRaw, setupConsole, - setConsoleEncoding, enableDebugOutput, disableDebugOutput, debugEnabled, @@ -183,13 +182,6 @@ setupConsole = do updateGlobalLogger rootLoggerName (setLevel NOTICE . setHandlers [s]) setConsoleEncoding -{- This avoids ghc's output layer crashing on invalid encoded characters in - - filenames when printing them out. -} -setConsoleEncoding :: IO () -setConsoleEncoding = do - fileEncoding stdout - fileEncoding stderr - {- Log formatter with precision into fractions of a second. -} preciseLogFormatter :: LogFormatter a preciseLogFormatter = tfLogFormatter "%F %X%Q" "[$time] $msg" diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index 67341d371d..eab98337a8 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -19,6 +19,7 @@ module Utility.FileSystemEncoding ( encodeW8NUL, decodeW8NUL, truncateFilePath, + setConsoleEncoding, ) where import qualified GHC.Foreign as GHC @@ -164,3 +165,10 @@ truncateFilePath n = reverse . go [] n . L8.fromString else go (c:coll) (cnt - x') (L8.drop 1 bs) _ -> coll #endif + +{- This avoids ghc's output layer crashing on invalid encoded characters in + - filenames when printing them out. -} +setConsoleEncoding :: IO () +setConsoleEncoding = do + fileEncoding stdout + fileEncoding stderr diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 2e8479e99f..a62e19f68d 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -331,18 +331,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. # fails to drop content from associated file othername, # because it doesn't know it has that name # git commit clears up this mess -* A new connection to the Keys database is opened each time. - It would be more efficient to reuse a connection. - However, that needs a way to close the connection, which was a problem. - See 38a23928e9d45b56d6836a4eac703862d63cf93c for details. -* See if the cases where the Keys database is not used can be - optimised. Eg, if the Keys database doesn't exist at all, - we know smudge/clean are not used, so queries don't - need to open the database or do reconciliation, but can simply return none. - Also, no need for Backend.lookupFile to catKeyFile in this case - (when not in direct mode). - However, beware over-optimisation breaking the assistant or perhaps other - long-lived processes. * Interaction with shared clones. Should avoid hard linking from/to a object in a shared clone if either repository has the object unlocked. (And should avoid unlocking an object if it's hard linked to a shared clone, @@ -368,6 +356,14 @@ files to be unlocked, while the indirect upgrades don't touch the files. smudged files.) * Audit code for all uses of isDirect. These places almost always need adjusting to support v6, if they haven't already. +* Optimisation: See if the database schema can be improved to speed things + up. Are there enough indexes? getAssociatedKey in particular does a + reverse lookup and might benefit from an index. +* Optimisation: Reads from the Keys database avoid doing anything if the + database doesn't exist. This makes v5 repos, or v6 with all locked files + faster. However, if a v6 repo unlocks and then re-locks a file, its + database will exist, and so this optimisation will no longer apply. + Could try to detect when the database is empty, and remove it or avoid reads. * Eventually (but not yet), make v6 the default for new repositories. Note that the assistant forces repos into direct mode; that will need to From f839d407e3dd09307600dc947d0145443be5e23d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Dec 2015 19:38:18 -0400 Subject: [PATCH 93/96] flush keys db queue even on exception Also fixed a bug in makeRunner; run' leaves the mvar empty so have to refill it. --- Annex.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Annex.hs b/Annex.hs index a8368f81ee..4f26c497c7 100644 --- a/Annex.hs +++ b/Annex.hs @@ -201,9 +201,12 @@ run s a = flip run' a =<< newMVar s run' :: MVar AnnexState -> Annex a -> IO (a, AnnexState) run' mvar a = do r <- runReaderT (runAnnex a) mvar + `onException` (flush =<< readMVar mvar) s' <- takeMVar mvar - maybe noop Keys.flushDbQueue (keysdbhandle s') + flush s' return (r, s') + where + flush = maybe noop Keys.flushDbQueue . keysdbhandle {- Performs an action in the Annex monad from a starting state, - and throws away the new state. -} @@ -215,7 +218,10 @@ eval s a = fst <$> run s a makeRunner :: Annex (Annex a -> IO a) makeRunner = do mvar <- ask - return $ \a -> fst <$> run' mvar a + return $ \a -> do + (r, s) <- run' mvar a + putMVar mvar s + return r getState :: (AnnexState -> v) -> Annex v getState selector = do From c21567dfd347167f9ecd8c7dc564a38d2814b977 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 24 Dec 2015 13:06:03 -0400 Subject: [PATCH 94/96] typo --- Database/Keys.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Database/Keys.hs b/Database/Keys.hs index c51a163c47..f5a28c7040 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -107,7 +107,7 @@ runWriter a = do v <- case st' of DbOpen qh -> a (WriteHandle qh) _ -> error "internal" - return (v, st) + return (v, st') queueDb :: SqlPersistM () -> WriteHandle -> Annex () queueDb a (WriteHandle h) = liftIO $ H.queueDb h checkcommit a From 9d3474ef1b402e50a71af82fbff5359796378fc0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 24 Dec 2015 13:07:42 -0400 Subject: [PATCH 95/96] unused import --- Database/Fsck.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/Database/Fsck.hs b/Database/Fsck.hs index da6e6263a8..e7ece34ed2 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -31,7 +31,6 @@ import Types.Key import Types.UUID import Annex.Perms import Annex.LockFile -import Messages import Database.Persist.TH import Database.Esqueleto hiding (Key) From 7c02f070b10eeaca26bfc3a4f554bbccada0997b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 24 Dec 2015 13:15:26 -0400 Subject: [PATCH 96/96] lost some bookkeeping info I forgot to convert this to use Annex.Ingest, todo later. --- Command/Smudge.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 5666381b05..bde440f7ee 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -14,6 +14,7 @@ import Annex.Link import Annex.MetaData import Annex.FileMatcher import Annex.InodeSentinal +import Annex.Ingest import Utility.InodeCache import Types.KeySource import Backend @@ -74,7 +75,7 @@ clean file = do if isJust (parseLinkOrPointer b) then liftIO $ B.hPut stdout b else ifM (shouldAnnex file) - ( liftIO . emitPointer =<< ingest file + ( liftIO . emitPointer =<< ingestLocal file , liftIO $ B.hPut stdout b ) stop @@ -84,8 +85,9 @@ shouldAnnex file = do matcher <- largeFilesMatcher checkFileMatcher matcher file -ingest :: FilePath -> Annex Key -ingest file = do +-- TODO: Use main ingest code instead? +ingestLocal :: FilePath -> Annex Key +ingestLocal file = do backend <- chooseBackend file ic <- withTSDelta (liftIO . genInodeCache file) let source = KeySource @@ -105,6 +107,8 @@ ingest file = do LinkAnnexNoop -> noop genMetaData k file =<< liftIO (getFileStatus file) + cleanOldKeys file k + Database.Keys.addAssociatedFile k file return k emitPointer :: Key -> IO ()