diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs new file mode 100644 index 0000000000..0fd32a042f --- /dev/null +++ b/Annex/Ingest.hs @@ -0,0 +1,220 @@ +{- git-annex content ingestion + - + - Copyright 2010-2015 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Ingest ( + lockDown, + ingest, + finishIngestDirect, + addLink, + makeLink, + restoreFile, + forceParams, +) where + +import Common.Annex +import Types.KeySource +import Backend +import Annex.Content +import Annex.Content.Direct +import Annex.Perms +import Annex.Link +import Annex.MetaData +import qualified Annex +import qualified Annex.Queue +import Config +import Utility.InodeCache +import Annex.ReplaceFile +import Utility.Tmp +import Utility.CopyFile +import Annex.InodeSentinal +import Annex.Version +#ifdef WITH_CLIBS +#ifndef __ANDROID__ +import Utility.Touch +#endif +#endif + +import Control.Exception (IOException) + +{- The file that's being ingested is locked down before a key is generated, + - to prevent it from being modified in between. This lock down is not + - perfect at best (and pretty weak at worst). For example, it does not + - guard against files that are already opened for write by another process. + - So a KeySource is returned. Its inodeCache can be used to detect any + - changes that might be made to the file after it was locked down. + - + - When possible, the file is hard linked to a temp directory. This guards + - against some changes, like deletion or overwrite of the file, and + - allows lsof checks to be done more efficiently when adding a lot of files. + - + - Lockdown can fail if a file gets deleted, and Nothing will be returned. + -} +lockDown :: FilePath -> Annex (Maybe KeySource) +lockDown = either + (\e -> warning (show e) >> return Nothing) + (return . Just) + <=< lockDown' + +lockDown' :: FilePath -> Annex (Either IOException KeySource) +lockDown' file = ifM crippledFileSystem + ( withTSDelta $ liftIO . tryIO . nohardlink + , tryIO $ do + tmp <- fromRepo gitAnnexTmpMiscDir + createAnnexDirectory tmp + go tmp + ) + where + {- In indirect mode, the write bit is removed from the file as part + - of lock down to guard against further writes, and because objects + - in the annex have their write bit disabled anyway. + - + - Freezing the content early also lets us fail early when + - someone else owns the file. + - + - This is not done in direct mode, because files there need to + - remain writable at all times. + -} + go tmp = do + unlessM isDirect $ + freezeContent file + withTSDelta $ \delta -> liftIO $ do + (tmpfile, h) <- openTempFile tmp $ + relatedTemplate $ takeFileName file + hClose h + nukeFile tmpfile + withhardlink delta tmpfile `catchIO` const (nohardlink delta) + nohardlink delta = do + cache <- genInodeCache file delta + return KeySource + { keyFilename = file + , contentLocation = file + , inodeCache = cache + } + withhardlink delta tmpfile = do + createLink file tmpfile + cache <- genInodeCache tmpfile delta + return KeySource + { keyFilename = file + , contentLocation = tmpfile + , inodeCache = cache + } + +{- Ingests a locked down file into the annex. + - + - In direct mode, leaves the file alone, and just updates bookkeeping + - information. + -} +ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache) +ingest Nothing = return (Nothing, Nothing) +ingest (Just source) = withTSDelta $ \delta -> do + backend <- chooseBackend $ keyFilename source + k <- genKey source backend + let src = contentLocation source + ms <- liftIO $ catchMaybeIO $ getFileStatus src + mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms + case (mcache, inodeCache source) of + (_, Nothing) -> go k mcache ms + (Just newc, Just c) | compareStrong c newc -> go k mcache ms + _ -> failure "changed while it was being added" + where + go k mcache ms = ifM isDirect + ( godirect k mcache ms + , goindirect k mcache ms + ) + + goindirect (Just (key, _)) mcache ms = do + catchNonAsync (moveAnnex key $ contentLocation source) + (restoreFile (keyFilename source) key) + maybe noop (genMetaData key (keyFilename source)) ms + liftIO $ nukeFile $ keyFilename source + return (Just key, mcache) + goindirect _ _ _ = failure "failed to generate a key" + + godirect (Just (key, _)) (Just cache) ms = do + addInodeCache key cache + maybe noop (genMetaData key (keyFilename source)) ms + finishIngestDirect key source + return (Just key, Just cache) + godirect _ _ _ = failure "failed to generate a key" + + failure msg = do + warning $ keyFilename source ++ " " ++ msg + when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + return (Nothing, Nothing) + +finishIngestDirect :: Key -> KeySource -> Annex () +finishIngestDirect key source = do + void $ addAssociatedFile key $ keyFilename source + when (contentLocation source /= keyFilename source) $ + liftIO $ nukeFile $ contentLocation source + + {- Copy to any other locations using the same key. -} + otherfs <- filter (/= keyFilename source) <$> associatedFiles key + forM_ otherfs $ + addContentWhenNotPresent key (keyFilename source) + +{- On error, put the file back so it doesn't seem to have vanished. + - This can be called before or after the symlink is in place. -} +restoreFile :: FilePath -> Key -> SomeException -> Annex a +restoreFile file key e = do + whenM (inAnnex key) $ do + liftIO $ nukeFile file + -- The key could be used by other files too, so leave the + -- content in the annex, and make a copy back to the file. + obj <- calcRepo $ gitAnnexLocation key + unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ + warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj + thawContent file + throwM e + +{- Creates the symlink to the annexed content, returns the link target. -} +makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String +makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do + l <- calcRepo $ gitAnnexLink file key + replaceFile file $ makeAnnexLink l + + -- touch symlink to have same time as the original file, + -- as provided in the InodeCache + case mcache of +#if defined(WITH_CLIBS) && ! defined(__ANDROID__) + Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False +#else + Just _ -> noop +#endif + Nothing -> noop + + return l + +{- Creates the symlink to the annexed content, and stages it in git. + - + - As long as the filesystem supports symlinks, we use + - git add, rather than directly staging the symlink to git. + - Using git add is best because it allows the queuing to work + - and is faster (staging the symlink runs hash-object commands each time). + - Also, using git add allows it to skip gitignored files, unless forced + - to include them. + -} +addLink :: FilePath -> Key -> Maybe InodeCache -> Annex () +addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) + ( do + _ <- makeLink file key mcache + ps <- forceParams + Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] + , do + l <- makeLink file key mcache + addAnnexLink l file + ) + +{- Parameters to pass to git add, forcing addition of ignored files. -} +forceParams :: Annex [CommandParam] +forceParams = ifM (Annex.getState Annex.force) + ( return [Param "-f"] + , return [] + ) diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 891df8419b..5e8df56c82 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -21,13 +21,13 @@ import Logs.Transfer import Logs.Location import qualified Annex.Queue import qualified Git.LsFiles -import qualified Command.Add import Utility.ThreadScheduler import qualified Utility.Lsof as Lsof import qualified Utility.DirWatcher as DirWatcher import Types.KeySource import Config import Annex.Content +import Annex.Ingest import Annex.Link import Annex.CatFile import Annex.InodeSentinal @@ -314,7 +314,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do doadd = sanitycheck ks $ do (mkey, mcache) <- liftAnnex $ do showStart "add" $ keyFilename ks - Command.Add.ingest $ Just ks + ingest $ Just ks maybe (failedingest change) (done change mcache $ keyFilename ks) mkey add _ = return Nothing @@ -344,7 +344,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do fastadddirect :: Change -> Key -> Assistant (Maybe Change) fastadddirect change key = do let source = keySource change - liftAnnex $ Command.Add.finishIngestDirect key source + liftAnnex $ finishIngestDirect key source done change Nothing (keyFilename source) key fastaddunlocked :: Change -> Key -> Assistant (Maybe Change) @@ -377,7 +377,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do , do link <- ifM isDirect ( calcRepo $ gitAnnexLink file key - , Command.Add.link file key mcache + , makeLink file key mcache ) whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $ stageSymlink file =<< hashSymlink link @@ -424,7 +424,7 @@ safeToAdd _ _ [] [] = return [] safeToAdd havelsof delayadd pending inprocess = do maybe noop (liftIO . threadDelaySeconds) delayadd liftAnnex $ do - keysources <- forM pending $ Command.Add.lockDown . changeFile + keysources <- forM pending $ lockDown . changeFile let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending keysources) openfiles <- if havelsof then S.fromList . map fst3 . filter openwrite <$> diff --git a/Command/Add.hs b/Command/Add.hs index ab4e3a9d17..b1b830cbcf 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -5,39 +5,23 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE CPP #-} - module Command.Add where import Common.Annex import Command -import Types.KeySource -import Backend +import Annex.Ingest import Logs.Location import Annex.Content import Annex.Content.Direct -import Annex.Perms import Annex.Link -import Annex.MetaData import qualified Annex import qualified Annex.Queue -#ifdef WITH_CLIBS -#ifndef __ANDROID__ -import Utility.Touch -#endif -#endif import Config import Utility.InodeCache import Annex.FileMatcher -import Annex.ReplaceFile -import Utility.Tmp -import Utility.CopyFile -import Annex.InodeSentinal import Annex.Version import qualified Database.Keys -import Control.Exception (IOException) - cmd :: Command cmd = notBareRepo $ withGlobalOptions (jobsOption : fileMatchingOptions) $ command "add" SectionCommon "add files to annex" @@ -89,9 +73,6 @@ addFile file = do Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] return True -{- The add subcommand annexes a file, generating a key for it using a - - backend, and then moving it into the annex directory and setting up - - the symlink pointing to its content. -} start :: FilePath -> CommandStart start file = ifAnnexed file addpresent add where @@ -131,188 +112,12 @@ start file = ifAnnexed file addpresent add void $ addAssociatedFile key file next $ next $ cleanup file key Nothing =<< inAnnex key -{- The file that's being added is locked down before a key is generated, - - to prevent it from being modified in between. This lock down is not - - perfect at best (and pretty weak at worst). For example, it does not - - guard against files that are already opened for write by another process. - - So a KeySource is returned. Its inodeCache can be used to detect any - - changes that might be made to the file after it was locked down. - - - - When possible, the file is hard linked to a temp directory. This guards - - against some changes, like deletion or overwrite of the file, and - - allows lsof checks to be done more efficiently when adding a lot of files. - - - - Lockdown can fail if a file gets deleted, and Nothing will be returned. - -} -lockDown :: FilePath -> Annex (Maybe KeySource) -lockDown = either - (\e -> warning (show e) >> return Nothing) - (return . Just) - <=< lockDown' - -lockDown' :: FilePath -> Annex (Either IOException KeySource) -lockDown' file = ifM crippledFileSystem - ( withTSDelta $ liftIO . tryIO . nohardlink - , tryIO $ do - tmp <- fromRepo gitAnnexTmpMiscDir - createAnnexDirectory tmp - go tmp - ) - where - {- In indirect mode, the write bit is removed from the file as part - - of lock down to guard against further writes, and because objects - - in the annex have their write bit disabled anyway. - - - - Freezing the content early also lets us fail early when - - someone else owns the file. - - - - This is not done in direct mode, because files there need to - - remain writable at all times. - -} - go tmp = do - unlessM isDirect $ - freezeContent file - withTSDelta $ \delta -> liftIO $ do - (tmpfile, h) <- openTempFile tmp $ - relatedTemplate $ takeFileName file - hClose h - nukeFile tmpfile - withhardlink delta tmpfile `catchIO` const (nohardlink delta) - nohardlink delta = do - cache <- genInodeCache file delta - return KeySource - { keyFilename = file - , contentLocation = file - , inodeCache = cache - } - withhardlink delta tmpfile = do - createLink file tmpfile - cache <- genInodeCache tmpfile delta - return KeySource - { keyFilename = file - , contentLocation = tmpfile - , inodeCache = cache - } - -{- Ingests a locked down file into the annex. - - - - In direct mode, leaves the file alone, and just updates bookkeeping - - information. - -} -ingest :: Maybe KeySource -> Annex (Maybe Key, Maybe InodeCache) -ingest Nothing = return (Nothing, Nothing) -ingest (Just source) = withTSDelta $ \delta -> do - backend <- chooseBackend $ keyFilename source - k <- genKey source backend - let src = contentLocation source - ms <- liftIO $ catchMaybeIO $ getFileStatus src - mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms - case (mcache, inodeCache source) of - (_, Nothing) -> go k mcache ms - (Just newc, Just c) | compareStrong c newc -> go k mcache ms - _ -> failure "changed while it was being added" - where - go k mcache ms = ifM isDirect - ( godirect k mcache ms - , goindirect k mcache ms - ) - - goindirect (Just (key, _)) mcache ms = do - catchNonAsync (moveAnnex key $ contentLocation source) - (undo (keyFilename source) key) - maybe noop (genMetaData key (keyFilename source)) ms - liftIO $ nukeFile $ keyFilename source - return (Just key, mcache) - goindirect _ _ _ = failure "failed to generate a key" - - godirect (Just (key, _)) (Just cache) ms = do - addInodeCache key cache - maybe noop (genMetaData key (keyFilename source)) ms - finishIngestDirect key source - return (Just key, Just cache) - godirect _ _ _ = failure "failed to generate a key" - - failure msg = do - warning $ keyFilename source ++ " " ++ msg - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source - return (Nothing, Nothing) - -finishIngestDirect :: Key -> KeySource -> Annex () -finishIngestDirect key source = do - void $ addAssociatedFile key $ keyFilename source - when (contentLocation source /= keyFilename source) $ - liftIO $ nukeFile $ contentLocation source - - {- Copy to any other locations using the same key. -} - otherfs <- filter (/= keyFilename source) <$> associatedFiles key - forM_ otherfs $ - addContentWhenNotPresent key (keyFilename source) - perform :: FilePath -> CommandPerform perform file = lockDown file >>= ingest >>= go where go (Just key, cache) = next $ cleanup file key cache True go (Nothing, _) = stop -{- On error, put the file back so it doesn't seem to have vanished. - - This can be called before or after the symlink is in place. -} -undo :: FilePath -> Key -> SomeException -> Annex a -undo file key e = do - whenM (inAnnex key) $ do - liftIO $ nukeFile file - -- The key could be used by other files too, so leave the - -- content in the annex, and make a copy back to the file. - obj <- calcRepo $ gitAnnexLocation key - unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ - warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj - thawContent file - throwM e - -{- Creates the symlink to the annexed content, returns the link target. -} -link :: FilePath -> Key -> Maybe InodeCache -> Annex String -link file key mcache = flip catchNonAsync (undo file key) $ do - l <- calcRepo $ gitAnnexLink file key - replaceFile file $ makeAnnexLink l - - -- touch symlink to have same time as the original file, - -- as provided in the InodeCache - case mcache of -#if defined(WITH_CLIBS) && ! defined(__ANDROID__) - Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False -#else - Just _ -> noop -#endif - Nothing -> noop - - return l - -{- Creates the symlink to the annexed content, and stages it in git. - - - - As long as the filesystem supports symlinks, we use - - git add, rather than directly staging the symlink to git. - - Using git add is best because it allows the queuing to work - - and is faster (staging the symlink runs hash-object commands each time). - - Also, using git add allows it to skip gitignored files, unless forced - - to include them. - -} -addLink :: FilePath -> Key -> Maybe InodeCache -> Annex () -addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) - ( do - _ <- link file key mcache - ps <- forceParams - Annex.Queue.addCommand "add" (ps++[Param "--"]) [file] - , do - l <- link file key mcache - addAnnexLink l file - ) - -forceParams :: Annex [CommandParam] -forceParams = ifM (Annex.getState Annex.force) - ( return [Param "-f"] - , return [] - ) - cleanup :: FilePath -> Key -> Maybe InodeCache -> Bool -> CommandCleanup cleanup file key mcache hascontent = do ifM (isDirect <&&> pure hascontent) diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs index 2b315eada4..57fd0cf388 100644 --- a/Command/AddUnused.hs +++ b/Command/AddUnused.hs @@ -10,7 +10,7 @@ module Command.AddUnused where import Common.Annex import Logs.Location import Command -import qualified Command.Add +import Annex.Ingest import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused) import Types.Key @@ -31,7 +31,7 @@ start = startUnused "addunused" perform perform :: Key -> CommandPerform perform key = next $ do logStatus key InfoPresent - Command.Add.addLink file key Nothing + addLink file key Nothing return True where file = "unused." ++ key2file key diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index de83d8c9b4..659274e49e 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -14,14 +14,15 @@ import Network.URI import Common.Annex import Command import Backend -import qualified Command.Add import qualified Annex import qualified Annex.Queue import qualified Annex.Url as Url import qualified Backend.URL import qualified Remote import qualified Types.Remote as Remote +import qualified Command.Add import Annex.Content +import Annex.Ingest import Annex.UUID import Logs.Web import Types.Key @@ -359,7 +360,7 @@ cleanup u url file key mtmp = case mtmp of when (isJust mtmp) $ logStatus key InfoPresent setUrlPresent u key url - Command.Add.addLink file key Nothing + addLink file key Nothing whenM isDirect $ do void $ addAssociatedFile key file {- For moveAnnex to work in direct mode, the symlink diff --git a/Command/Indirect.hs b/Command/Indirect.hs index f5234b4dc8..06897e292e 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -20,7 +20,7 @@ import Annex.Content import Annex.Content.Direct import Annex.CatFile import Annex.Init -import qualified Command.Add +import Annex.Ingest cmd :: Command cmd = notBareRepo $ noDaemonRunning $ @@ -90,7 +90,7 @@ perform = do Right _ -> do l <- calcRepo $ gitAnnexLink f k liftIO $ createSymbolicLink l f - Left e -> catchNonAsync (Command.Add.undo f k e) + Left e -> catchNonAsync (restoreFile f k e) warnlocked showEndOk diff --git a/Command/Lock.hs b/Command/Lock.hs index 741c18c150..1be6e9c761 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -19,7 +19,7 @@ import Annex.Perms import Annex.ReplaceFile import Utility.InodeCache import qualified Database.Keys -import qualified Command.Add +import Annex.Ingest import Logs.Location cmd :: Command @@ -60,7 +60,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file) performNew :: FilePath -> Key -> Bool -> CommandPerform performNew file key filemodified = do lockdown =<< calcRepo (gitAnnexLocation key) - Command.Add.addLink file key + addLink file key =<< withTSDelta (liftIO . genInodeCache file) next $ cleanupNew file key where diff --git a/Command/ReKey.hs b/Command/ReKey.hs index fe13d4dd45..9fb8515c01 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -12,7 +12,7 @@ import Command import qualified Annex import Types.Key import Annex.Content -import qualified Command.Add +import Annex.Ingest import Logs.Web import Logs.Location import Utility.CopyFile @@ -70,6 +70,6 @@ cleanup file oldkey newkey = do -- Update symlink to use the new key. liftIO $ removeFile file - Command.Add.addLink file newkey Nothing + addLink file newkey Nothing logStatus newkey InfoPresent return True diff --git a/Types/KeySource.hs b/Types/KeySource.hs index 7c2fd13d56..25774588a2 100644 --- a/Types/KeySource.hs +++ b/Types/KeySource.hs @@ -9,7 +9,7 @@ module Types.KeySource where import Utility.InodeCache -{- When content is in the process of being added to the annex, +{- When content is in the process of being ingested into the annex, - and a Key generated from it, this data type is used. - - The contentLocation may be different from the filename @@ -19,7 +19,7 @@ import Utility.InodeCache - of a different Key. - - The inodeCache can be used to detect some types of modifications to - - files that may be made while they're in the process of being added. + - files that may be made while they're in the process of being ingested. -} data KeySource = KeySource { keyFilename :: FilePath