From d3cee987caf20b309334b37bd1b89e8b9115cf0a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 5 Jun 2012 19:51:03 -0400 Subject: [PATCH 1/4] separate source of content from the filename associated with the key when generating a key This already made migrate's code a lot simpler. --- Backend.hs | 18 ++++++++++-------- Backend/SHA.hs | 13 +++++++------ Backend/URL.hs | 10 +++++----- Backend/WORM.hs | 18 +++++++++--------- Command/Add.hs | 7 ++++--- Command/AddUrl.hs | 7 ++++--- Command/Migrate.hs | 26 +++++++------------------- Types/Backend.hs | 13 ++++++++++--- doc/design/assistant/inotify.mdwn | 4 +++- 9 files changed, 59 insertions(+), 57 deletions(-) diff --git a/Backend.hs b/Backend.hs index fa32669449..bde1aad78e 100644 --- a/Backend.hs +++ b/Backend.hs @@ -6,6 +6,7 @@ -} module Backend ( + B.KeySource(..), list, orderedList, genKey, @@ -51,18 +52,19 @@ orderedList = do parseBackendList s = map lookupBackendName $ words s {- Generates a key for a file, trying each backend in turn until one - - accepts it. -} -genKey :: FilePath -> Maybe Backend -> Annex (Maybe (Key, Backend)) -genKey file trybackend = do + - accepts it. + -} +genKey :: B.KeySource -> Maybe Backend -> Annex (Maybe (Key, Backend)) +genKey source trybackend = do bs <- orderedList let bs' = maybe bs (: bs) trybackend - genKey' bs' file -genKey' :: [Backend] -> FilePath -> Annex (Maybe (Key, Backend)) + genKey' bs' source +genKey' :: [Backend] -> B.KeySource -> Annex (Maybe (Key, Backend)) genKey' [] _ = return Nothing -genKey' (b:bs) file = do - r <- B.getKey b file +genKey' (b:bs) source = do + r <- B.getKey b source case r of - Nothing -> genKey' bs file + Nothing -> genKey' bs source Just k -> return $ Just (makesane k, b) where -- keyNames should not contain newline characters. diff --git a/Backend/SHA.hs b/Backend/SHA.hs index c2a6cf9761..df613bbcdd 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -69,9 +69,10 @@ shaN size file = do command = fromJust $ shaCommand size {- A key is a checksum of its contents. -} -keyValue :: SHASize -> FilePath -> Annex (Maybe Key) -keyValue size file = do - s <- shaN size file +keyValue :: SHASize -> KeySource -> Annex (Maybe Key) +keyValue size source = do + let file = contentLocation source + s <- shaN size file stat <- liftIO $ getFileStatus file return $ Just $ stubKey { keyName = s @@ -80,14 +81,14 @@ keyValue size file = do } {- Extension preserving keys. -} -keyValueE :: SHASize -> FilePath -> Annex (Maybe Key) -keyValueE size file = keyValue size file >>= maybe (return Nothing) addE +keyValueE :: SHASize -> KeySource -> Annex (Maybe Key) +keyValueE size source = keyValue size source >>= maybe (return Nothing) addE where addE k = return $ Just $ k { keyName = keyName k ++ extension , keyBackendName = shaNameE size } - naiveextension = takeExtension file + naiveextension = takeExtension $ keyFilename source extension -- long or newline containing extensions are -- probably not really an extension diff --git a/Backend/URL.hs b/Backend/URL.hs index b98974cb45..cc9112a362 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -20,11 +20,11 @@ backends :: [Backend] backends = [backend] backend :: Backend -backend = Backend { - name = "URL", - getKey = const (return Nothing), - fsckKey = Nothing -} +backend = Backend + { name = "URL" + , getKey = const $ return Nothing + , fsckKey = Nothing + } fromUrl :: String -> Maybe Integer -> Key fromUrl url size = stubKey diff --git a/Backend/WORM.hs b/Backend/WORM.hs index c022fd413b..630000fa2b 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -15,11 +15,11 @@ backends :: [Backend] backends = [backend] backend :: Backend -backend = Backend { - name = "WORM", - getKey = keyValue, - fsckKey = Nothing -} +backend = Backend + { name = "WORM" + , getKey = keyValue + , fsckKey = Nothing + } {- The key includes the file size, modification time, and the - basename of the filename. @@ -28,11 +28,11 @@ backend = Backend { - while also allowing a file to be moved around while retaining the - same key. -} -keyValue :: FilePath -> Annex (Maybe Key) -keyValue file = do - stat <- liftIO $ getFileStatus file +keyValue :: KeySource -> Annex (Maybe Key) +keyValue source = do + stat <- liftIO $ getFileStatus $ contentLocation source return $ Just Key { - keyName = takeFileName file, + keyName = takeFileName $ keyFilename source, keyBackendName = name backend, keySize = Just $ fromIntegral $ fileSize stat, keyMtime = Just $ modificationTime stat diff --git a/Command/Add.hs b/Command/Add.hs index ef839b2a30..7029a9c167 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -12,7 +12,7 @@ import Annex.Exception import Command import qualified Annex import qualified Annex.Queue -import qualified Backend +import Backend import Logs.Location import Annex.Content import Utility.Touch @@ -46,8 +46,9 @@ start file = notBareRepo $ ifAnnexed file fixup add perform :: FilePath -> CommandPerform perform file = do - backend <- Backend.chooseBackend file - Backend.genKey file backend >>= go + let source = KeySource { keyFilename = file, contentLocation = file} + backend <- chooseBackend file + genKey source backend >>= go where go Nothing = stop go (Just (key, _)) = do diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 089606e85d..87b24149d8 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -11,7 +11,7 @@ import Network.URI import Common.Annex import Command -import qualified Backend +import Backend import qualified Command.Add import qualified Annex import qualified Backend.URL @@ -72,8 +72,9 @@ download url file = do tmp <- fromRepo $ gitAnnexTmpLocation dummykey liftIO $ createDirectoryIfMissing True (parentDir tmp) stopUnless (downloadUrl [url] tmp) $ do - backend <- Backend.chooseBackend file - k <- Backend.genKey tmp backend + backend <- chooseBackend file + let source = KeySource { keyFilename = file, contentLocation = file} + k <- genKey source backend case k of Nothing -> stop Just (key, _) -> do diff --git a/Command/Migrate.hs b/Command/Migrate.hs index 6e28c4b52e..29e664ce23 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -9,7 +9,7 @@ module Command.Migrate where import Common.Annex import Command -import qualified Backend +import Backend import qualified Types.Key import Annex.Content import qualified Command.ReKey @@ -23,14 +23,14 @@ seek = [withFilesInGit $ whenAnnexed start] start :: FilePath -> (Key, Backend) -> CommandStart start file (key, oldbackend) = do exists <- inAnnex key - newbackend <- choosebackend =<< Backend.chooseBackend file + newbackend <- choosebackend =<< chooseBackend file if (newbackend /= oldbackend || upgradableKey key) && exists then do showStart "migrate" file next $ perform file key newbackend else stop where - choosebackend Nothing = Prelude.head <$> Backend.orderedList + choosebackend Nothing = Prelude.head <$> orderedList choosebackend (Just backend) = return backend {- Checks if a key is upgradable to a newer representation. -} @@ -40,25 +40,13 @@ upgradableKey key = isNothing $ Types.Key.keySize key {- Store the old backend's key in the new backend - The old backend's key is not dropped from it, because there may - - be other files still pointing at that key. - - - - Use the same filename as the file for the temp file name, to support - - backends that allow the filename to influence the keys they - - generate. - -} + - be other files still pointing at that key. -} perform :: FilePath -> Key -> Backend -> CommandPerform perform file oldkey newbackend = maybe stop go =<< genkey where go newkey = stopUnless (Command.ReKey.linkKey oldkey newkey) $ next $ Command.ReKey.cleanup file oldkey newkey genkey = do - src <- inRepo $ gitAnnexLocation oldkey - tmp <- fromRepo gitAnnexTmpDir - let tmpfile = tmp takeFileName file - cleantmp tmpfile - liftIO $ createLink src tmpfile - newkey <- liftM fst <$> - Backend.genKey tmpfile (Just newbackend) - cleantmp tmpfile - return newkey - cleantmp t = liftIO $ whenM (doesFileExist t) $ removeFile t + content <- inRepo $ gitAnnexLocation oldkey + let source = KeySource { keyFilename = file, contentLocation = content } + liftM fst <$> genKey source (Just newbackend) diff --git a/Types/Backend.hs b/Types/Backend.hs index d52cec5471..5abb0896dc 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -2,7 +2,7 @@ - - Most things should not need this, using Types instead - - - Copyright 2010 Joey Hess + - Copyright 2010,2012 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,11 +11,18 @@ module Types.Backend where import Types.Key +{- The source used to generate a key. The location of the content + - may be different from the filename associated with the key. -} +data KeySource = KeySource + { keyFilename :: FilePath + , contentLocation :: FilePath + } + data BackendA a = Backend { -- name of this backend name :: String, - -- converts a filename to a key - getKey :: FilePath -> a (Maybe Key), + -- gets the key to use for a given content + getKey :: KeySource -> a (Maybe Key), -- called during fsck to check a key, if the backend has its own checks fsckKey :: Maybe (Key -> FilePath -> a Bool) } diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn index 3263c476da..ca63a1c823 100644 --- a/doc/design/assistant/inotify.mdwn +++ b/doc/design/assistant/inotify.mdwn @@ -61,7 +61,9 @@ Many races need to be dealt with by this code. Here are some of them. **Currently unfixed**; The new content will be moved to the annex under the old checksum, and fsck will later catch this inconsistency. - Possible fix: Move content someplace before doing checksumming. + Possible fix: Move content someplace before doing checksumming. Perhaps + using a hard link and removing the write bit to prevent modification + while checksumming. * File is added and then replaced with another file before the annex add makes its symlink. From f8d422fe24e425676a928959a2489f277c3026d3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 5 Jun 2012 19:54:44 -0400 Subject: [PATCH 2/4] update test suite --- test.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test.hs b/test.hs index 9a0fce873e..1a7c382c01 100644 --- a/test.hs +++ b/test.hs @@ -171,7 +171,8 @@ test_reinject :: Test test_reinject = "git-annex reinject/fromkey" ~: TestCase $ intmpclonerepo $ do git_annex "drop" ["--force", sha1annexedfile] @? "drop failed" writeFile tmp $ content sha1annexedfile - r <- annexeval $ Types.Backend.getKey backendSHA1 tmp + r <- annexeval $ Types.Backend.getKey backendSHA1 $ + Types.Backend.KeySource { Types.Backend.keyFilename = tmp, Types.Backend.contentLocation = tmp } let key = show $ fromJust r git_annex "reinject" [tmp, sha1annexedfile] @? "reinject failed" git_annex "fromkey" [key, sha1annexedfiledup] @? "fromkey failed" From 5809f33f8b3c2aa3cb8207bc775339c533a914ab Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 5 Jun 2012 20:25:32 -0400 Subject: [PATCH 3/4] use createAnnexDirectory when setting up tmp dir --- Command/Fsck.hs | 3 ++- Remote/Rsync.hs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 38b1bbbacd..ae21acf8af 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -16,6 +16,7 @@ import qualified Types.Backend import qualified Types.Key import qualified Backend import Annex.Content +import Annex.Perms import Logs.Location import Logs.Trust import Annex.UUID @@ -83,8 +84,8 @@ performRemote key file backend numcopies remote = withtmp a = do pid <- liftIO getProcessID t <- fromRepo gitAnnexTmpDir + createAnnexDirectory t let tmp = t "fsck" ++ show pid ++ "." ++ keyFile key - liftIO $ createDirectoryIfMissing True t let cleanup = liftIO $ catchIO (removeFile tmp) (const noop) cleanup cleanup `after` a tmp diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 60cbf4595f..df4e0a44f2 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -19,6 +19,7 @@ import Remote.Helper.Special import Remote.Helper.Encryptable import Crypto import Utility.RsyncFile +import Annex.Perms type RsyncUrl = String @@ -176,6 +177,7 @@ withRsyncScratchDir :: (FilePath -> Annex Bool) -> Annex Bool withRsyncScratchDir a = do pid <- liftIO getProcessID t <- fromRepo gitAnnexTmpDir + createAnnexDirectory t let tmp = t "rsynctmp" show pid nuke tmp liftIO $ createDirectoryIfMissing True tmp From c981ccc0773a02ca60eb6456f04de14cd758ee7b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 5 Jun 2012 20:28:34 -0400 Subject: [PATCH 4/4] add: Prevent (most) modifications from being made to a file while it is being added to the annex. Anything that tries to open the file for write, or delete the file, or replace it with something else, will not affect the add. Only if a process has the file open for write before add starts can it still change it while (or after) it's added to the annex. (fsck will catch this later of course) --- Command/Add.hs | 29 +++++++++++++++++++++++------ debian/changelog | 7 +++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Command/Add.hs b/Command/Add.hs index 7029a9c167..2c671eea29 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -15,7 +15,9 @@ import qualified Annex.Queue import Backend import Logs.Location import Annex.Content +import Annex.Perms import Utility.Touch +import Utility.FileMode def :: [Command] def = [command "add" paramPaths seek "add files to annex"] @@ -44,23 +46,38 @@ start file = notBareRepo $ ifAnnexed file fixup add liftIO $ removeFile file next $ next $ cleanup file key =<< inAnnex key +{- The file that's being added is locked down before a key is generated, + - to prevent it from being modified in between. It's hard linked into a + - temporary location, and its writable bits are removed. It could still be + - written to by a process that already has it open for writing. -} perform :: FilePath -> CommandPerform perform file = do - let source = KeySource { keyFilename = file, contentLocation = file} + liftIO $ preventWrite file + tmp <- fromRepo gitAnnexTmpDir + createAnnexDirectory tmp + pid <- liftIO getProcessID + let tmpfile = tmp "add" ++ show pid ++ "." ++ takeFileName file + nuke tmpfile + liftIO $ createLink file tmpfile + let source = KeySource { keyFilename = file, contentLocation = tmpfile } backend <- chooseBackend file - genKey source backend >>= go + genKey source backend >>= go tmpfile where - go Nothing = stop - go (Just (key, _)) = do - handle (undo file key) $ moveAnnex key file + go _ Nothing = stop + go tmpfile (Just (key, _)) = do + handle (undo file key) $ moveAnnex key tmpfile + nuke file next $ cleanup file key True +nuke :: FilePath -> Annex () +nuke file = liftIO $ whenM (doesFileExist file) $ removeFile file + {- On error, put the file back so it doesn't seem to have vanished. - This can be called before or after the symlink is in place. -} undo :: FilePath -> Key -> IOException -> Annex a undo file key e = do whenM (inAnnex key) $ do - liftIO $ whenM (doesFileExist file) $ removeFile file + nuke file handle tryharder $ fromAnnex key file logStatus key InfoMissing throw e diff --git a/debian/changelog b/debian/changelog index fd4f7b98bc..9a010327df 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +git-annex (3.20120606) UNRELEASED; urgency=low + + * add: Prevent (most) modifications from being made to a file while it + is being added to the annex. + + -- Joey Hess Tue, 05 Jun 2012 20:25:51 -0400 + git-annex (3.20120605) unstable; urgency=low * sync: Show a nicer message if a user tries to sync to a special remote.