2015-12-22 17:23:33 +00:00
|
|
|
{- git-annex content ingestion
|
|
|
|
-
|
|
|
|
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
|
|
|
module Annex.Ingest (
|
2015-12-22 19:23:27 +00:00
|
|
|
LockedDown(..),
|
2016-01-07 21:39:59 +00:00
|
|
|
LockDownConfig(..),
|
2015-12-22 17:23:33 +00:00
|
|
|
lockDown,
|
|
|
|
ingest,
|
|
|
|
finishIngestDirect,
|
2015-12-22 20:22:28 +00:00
|
|
|
finishIngestUnlocked,
|
2015-12-22 20:55:49 +00:00
|
|
|
cleanOldKeys,
|
2015-12-22 17:23:33 +00:00
|
|
|
addLink,
|
|
|
|
makeLink,
|
|
|
|
restoreFile,
|
|
|
|
forceParams,
|
|
|
|
) where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2015-12-22 17:23:33 +00:00
|
|
|
import Types.KeySource
|
|
|
|
import Backend
|
|
|
|
import Annex.Content
|
|
|
|
import Annex.Content.Direct
|
|
|
|
import Annex.Perms
|
|
|
|
import Annex.Link
|
|
|
|
import Annex.MetaData
|
2015-12-22 20:55:49 +00:00
|
|
|
import Logs.Location
|
2015-12-22 17:23:33 +00:00
|
|
|
import qualified Annex
|
|
|
|
import qualified Annex.Queue
|
2015-12-22 20:22:28 +00:00
|
|
|
import qualified Database.Keys
|
2015-12-22 17:23:33 +00:00
|
|
|
import Config
|
|
|
|
import Utility.InodeCache
|
|
|
|
import Annex.ReplaceFile
|
|
|
|
import Utility.Tmp
|
|
|
|
import Utility.CopyFile
|
2016-01-05 21:22:19 +00:00
|
|
|
import Git.FilePath
|
2015-12-22 17:23:33 +00:00
|
|
|
import Annex.InodeSentinal
|
|
|
|
#ifdef WITH_CLIBS
|
|
|
|
#ifndef __ANDROID__
|
|
|
|
import Utility.Touch
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
import Control.Exception (IOException)
|
|
|
|
|
2015-12-22 19:23:27 +00:00
|
|
|
data LockedDown = LockedDown
|
2016-01-07 21:39:59 +00:00
|
|
|
{ lockDownConfig :: LockDownConfig
|
2015-12-22 19:23:27 +00:00
|
|
|
, keySource :: KeySource
|
|
|
|
}
|
|
|
|
deriving (Show)
|
|
|
|
|
2016-01-07 21:39:59 +00:00
|
|
|
data LockDownConfig = LockDownConfig
|
|
|
|
{ lockingFile :: Bool -- ^ write bit removed during lock down
|
|
|
|
, hardlinkFileTmp :: Bool -- ^ hard link to temp directory
|
|
|
|
}
|
|
|
|
deriving (Show)
|
|
|
|
|
2015-12-22 17:23:33 +00:00
|
|
|
{- The file that's being ingested is locked down before a key is generated,
|
|
|
|
- to prevent it from being modified in between. This lock down is not
|
|
|
|
- perfect at best (and pretty weak at worst). For example, it does not
|
|
|
|
- guard against files that are already opened for write by another process.
|
2015-12-22 19:23:27 +00:00
|
|
|
- So, the InodeCache can be used to detect any changes that might be made
|
|
|
|
- to the file after it was locked down.
|
2015-12-22 17:23:33 +00:00
|
|
|
-
|
|
|
|
- When possible, the file is hard linked to a temp directory. This guards
|
|
|
|
- against some changes, like deletion or overwrite of the file, and
|
|
|
|
- allows lsof checks to be done more efficiently when adding a lot of files.
|
|
|
|
-
|
|
|
|
- Lockdown can fail if a file gets deleted, and Nothing will be returned.
|
|
|
|
-}
|
2016-01-07 21:39:59 +00:00
|
|
|
lockDown :: LockDownConfig -> FilePath -> Annex (Maybe LockedDown)
|
|
|
|
lockDown cfg file = either
|
2015-12-22 17:23:33 +00:00
|
|
|
(\e -> warning (show e) >> return Nothing)
|
|
|
|
(return . Just)
|
2016-01-07 21:39:59 +00:00
|
|
|
=<< lockDown' cfg file
|
2015-12-22 17:23:33 +00:00
|
|
|
|
2016-01-07 21:39:59 +00:00
|
|
|
lockDown' :: LockDownConfig -> FilePath -> Annex (Either IOException LockedDown)
|
|
|
|
lockDown' cfg file = ifM (pure (not (hardlinkFileTmp cfg)) <||> crippledFileSystem)
|
2015-12-22 17:23:33 +00:00
|
|
|
( withTSDelta $ liftIO . tryIO . nohardlink
|
|
|
|
, tryIO $ do
|
|
|
|
tmp <- fromRepo gitAnnexTmpMiscDir
|
|
|
|
createAnnexDirectory tmp
|
2016-01-07 21:39:59 +00:00
|
|
|
when (lockingFile cfg) $
|
2015-12-22 17:23:33 +00:00
|
|
|
freezeContent file
|
|
|
|
withTSDelta $ \delta -> liftIO $ do
|
|
|
|
(tmpfile, h) <- openTempFile tmp $
|
|
|
|
relatedTemplate $ takeFileName file
|
|
|
|
hClose h
|
|
|
|
nukeFile tmpfile
|
|
|
|
withhardlink delta tmpfile `catchIO` const (nohardlink delta)
|
2015-12-22 19:23:27 +00:00
|
|
|
)
|
|
|
|
where
|
2015-12-22 17:23:33 +00:00
|
|
|
nohardlink delta = do
|
|
|
|
cache <- genInodeCache file delta
|
2016-01-07 21:39:59 +00:00
|
|
|
return $ LockedDown cfg $ KeySource
|
2015-12-22 17:23:33 +00:00
|
|
|
{ keyFilename = file
|
|
|
|
, contentLocation = file
|
|
|
|
, inodeCache = cache
|
|
|
|
}
|
|
|
|
withhardlink delta tmpfile = do
|
|
|
|
createLink file tmpfile
|
|
|
|
cache <- genInodeCache tmpfile delta
|
2016-01-07 21:39:59 +00:00
|
|
|
return $ LockedDown cfg $ KeySource
|
2015-12-22 17:23:33 +00:00
|
|
|
{ keyFilename = file
|
|
|
|
, contentLocation = tmpfile
|
|
|
|
, inodeCache = cache
|
|
|
|
}
|
|
|
|
|
|
|
|
{- Ingests a locked down file into the annex.
|
|
|
|
-
|
2015-12-22 19:23:27 +00:00
|
|
|
- The file may be added to the git repository as a locked or an unlocked
|
|
|
|
- file. When unlocked, the work tree file is left alone. When locked,
|
|
|
|
- the work tree file is deleted, in preparation for adding the symlink.
|
2015-12-22 17:23:33 +00:00
|
|
|
-}
|
2015-12-22 19:23:27 +00:00
|
|
|
ingest :: Maybe LockedDown -> Annex (Maybe Key, Maybe InodeCache)
|
2015-12-22 17:23:33 +00:00
|
|
|
ingest Nothing = return (Nothing, Nothing)
|
2016-01-07 21:39:59 +00:00
|
|
|
ingest (Just (LockedDown cfg source)) = withTSDelta $ \delta -> do
|
2015-12-22 17:23:33 +00:00
|
|
|
backend <- chooseBackend $ keyFilename source
|
|
|
|
k <- genKey source backend
|
|
|
|
let src = contentLocation source
|
|
|
|
ms <- liftIO $ catchMaybeIO $ getFileStatus src
|
|
|
|
mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
|
|
|
|
case (mcache, inodeCache source) of
|
|
|
|
(_, Nothing) -> go k mcache ms
|
|
|
|
(Just newc, Just c) | compareStrong c newc -> go k mcache ms
|
|
|
|
_ -> failure "changed while it was being added"
|
|
|
|
where
|
2015-12-22 19:23:27 +00:00
|
|
|
go (Just (key, _)) mcache (Just s)
|
2016-01-07 21:39:59 +00:00
|
|
|
| lockingFile cfg = golocked key mcache s
|
2015-12-22 19:23:27 +00:00
|
|
|
| otherwise = ifM isDirect
|
|
|
|
( godirect key mcache s
|
|
|
|
, gounlocked key mcache s
|
|
|
|
)
|
|
|
|
go _ _ _ = failure "failed to generate a key"
|
2015-12-22 17:23:33 +00:00
|
|
|
|
2015-12-22 19:23:27 +00:00
|
|
|
golocked key mcache s = do
|
2015-12-22 17:23:33 +00:00
|
|
|
catchNonAsync (moveAnnex key $ contentLocation source)
|
|
|
|
(restoreFile (keyFilename source) key)
|
|
|
|
liftIO $ nukeFile $ keyFilename source
|
2015-12-22 20:22:28 +00:00
|
|
|
populateAssociatedFiles key source
|
2015-12-22 19:23:27 +00:00
|
|
|
success key mcache s
|
|
|
|
|
|
|
|
gounlocked key (Just cache) s = do
|
2015-12-22 20:22:28 +00:00
|
|
|
-- Remove temp directory hard link first because
|
2015-12-27 19:59:59 +00:00
|
|
|
-- linkToAnnex falls back to copying if a file
|
2015-12-22 20:22:28 +00:00
|
|
|
-- already has a hard link.
|
|
|
|
cleanCruft source
|
2015-12-22 20:55:49 +00:00
|
|
|
cleanOldKeys (keyFilename source) key
|
2015-12-27 19:59:59 +00:00
|
|
|
r <- linkToAnnex key (keyFilename source) (Just cache)
|
2015-12-22 19:23:27 +00:00
|
|
|
case r of
|
|
|
|
LinkAnnexFailed -> failure "failed to link to annex"
|
2015-12-22 20:22:28 +00:00
|
|
|
_ -> do
|
2015-12-22 22:03:47 +00:00
|
|
|
finishIngestUnlocked' key source
|
2015-12-22 20:22:28 +00:00
|
|
|
success key (Just cache) s
|
2015-12-22 19:23:27 +00:00
|
|
|
gounlocked _ _ _ = failure "failed statting file"
|
2015-12-22 17:23:33 +00:00
|
|
|
|
2015-12-22 19:23:27 +00:00
|
|
|
godirect key (Just cache) s = do
|
2015-12-22 17:23:33 +00:00
|
|
|
addInodeCache key cache
|
|
|
|
finishIngestDirect key source
|
2015-12-22 19:23:27 +00:00
|
|
|
success key (Just cache) s
|
|
|
|
godirect _ _ _ = failure "failed statting file"
|
|
|
|
|
|
|
|
success k mcache s = do
|
|
|
|
genMetaData k (keyFilename source) s
|
|
|
|
return (Just k, mcache)
|
2015-12-22 17:23:33 +00:00
|
|
|
|
|
|
|
failure msg = do
|
|
|
|
warning $ keyFilename source ++ " " ++ msg
|
2015-12-22 19:23:27 +00:00
|
|
|
cleanCruft source
|
2015-12-22 17:23:33 +00:00
|
|
|
return (Nothing, Nothing)
|
|
|
|
|
|
|
|
finishIngestDirect :: Key -> KeySource -> Annex ()
|
|
|
|
finishIngestDirect key source = do
|
|
|
|
void $ addAssociatedFile key $ keyFilename source
|
2015-12-22 19:23:27 +00:00
|
|
|
cleanCruft source
|
2015-12-22 17:23:33 +00:00
|
|
|
|
|
|
|
{- Copy to any other locations using the same key. -}
|
|
|
|
otherfs <- filter (/= keyFilename source) <$> associatedFiles key
|
|
|
|
forM_ otherfs $
|
|
|
|
addContentWhenNotPresent key (keyFilename source)
|
|
|
|
|
2015-12-22 20:22:28 +00:00
|
|
|
finishIngestUnlocked :: Key -> KeySource -> Annex ()
|
|
|
|
finishIngestUnlocked key source = do
|
2015-12-22 22:03:47 +00:00
|
|
|
cleanCruft source
|
|
|
|
finishIngestUnlocked' key source
|
|
|
|
|
|
|
|
finishIngestUnlocked' :: Key -> KeySource -> Annex ()
|
|
|
|
finishIngestUnlocked' key source = do
|
2016-01-05 21:22:19 +00:00
|
|
|
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath (keyFilename source))
|
2015-12-22 20:22:28 +00:00
|
|
|
populateAssociatedFiles key source
|
|
|
|
|
|
|
|
{- Copy to any other locations using the same key. -}
|
|
|
|
populateAssociatedFiles :: Key -> KeySource -> Annex ()
|
|
|
|
populateAssociatedFiles key source = do
|
|
|
|
obj <- calcRepo (gitAnnexLocation key)
|
2016-01-05 21:22:19 +00:00
|
|
|
g <- Annex.gitRepo
|
|
|
|
ingestedf <- flip fromTopFilePath g
|
|
|
|
<$> inRepo (toTopFilePath (keyFilename source))
|
|
|
|
afs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key
|
|
|
|
forM_ (filter (/= ingestedf) afs) $
|
2015-12-22 20:22:28 +00:00
|
|
|
populatePointerFile key obj
|
|
|
|
|
2015-12-22 19:23:27 +00:00
|
|
|
cleanCruft :: KeySource -> Annex ()
|
|
|
|
cleanCruft source = when (contentLocation source /= keyFilename source) $
|
|
|
|
liftIO $ nukeFile $ contentLocation source
|
|
|
|
|
2015-12-22 20:55:49 +00:00
|
|
|
-- If a worktree file was was hard linked to an annex object before,
|
|
|
|
-- modifying the file would have caused the object to have the wrong
|
|
|
|
-- content. Clean up from that.
|
|
|
|
cleanOldKeys :: FilePath -> Key -> Annex ()
|
|
|
|
cleanOldKeys file newkey = do
|
2016-01-05 21:22:19 +00:00
|
|
|
g <- Annex.gitRepo
|
|
|
|
ingestedf <- flip fromTopFilePath g <$> inRepo (toTopFilePath file)
|
|
|
|
topf <- inRepo (toTopFilePath file)
|
2015-12-22 20:55:49 +00:00
|
|
|
oldkeys <- filter (/= newkey)
|
2016-01-05 21:22:19 +00:00
|
|
|
<$> Database.Keys.getAssociatedKey topf
|
|
|
|
forM_ oldkeys $ \key -> do
|
2015-12-22 20:55:49 +00:00
|
|
|
obj <- calcRepo (gitAnnexLocation key)
|
|
|
|
caches <- Database.Keys.getInodeCaches key
|
|
|
|
unlessM (sameInodeCache obj caches) $ do
|
|
|
|
unlinkAnnex key
|
2016-01-05 21:22:19 +00:00
|
|
|
fs <- filter (/= ingestedf)
|
|
|
|
. map (`fromTopFilePath` g)
|
2015-12-22 20:55:49 +00:00
|
|
|
<$> Database.Keys.getAssociatedFiles key
|
|
|
|
fs' <- filterM (`sameInodeCache` caches) fs
|
|
|
|
case fs' of
|
2015-12-27 19:59:59 +00:00
|
|
|
-- If linkToAnnex fails, the associated
|
2015-12-22 20:55:49 +00:00
|
|
|
-- file with the content is still present,
|
|
|
|
-- so no need for any recovery.
|
|
|
|
(f:_) -> do
|
|
|
|
ic <- withTSDelta (liftIO . genInodeCache f)
|
2015-12-27 19:59:59 +00:00
|
|
|
void $ linkToAnnex key f ic
|
2016-01-05 21:22:19 +00:00
|
|
|
_ -> logStatus key InfoMissing
|
2015-12-22 20:55:49 +00:00
|
|
|
|
2015-12-22 17:23:33 +00:00
|
|
|
{- On error, put the file back so it doesn't seem to have vanished.
|
|
|
|
- This can be called before or after the symlink is in place. -}
|
|
|
|
restoreFile :: FilePath -> Key -> SomeException -> Annex a
|
|
|
|
restoreFile file key e = do
|
|
|
|
whenM (inAnnex key) $ do
|
|
|
|
liftIO $ nukeFile file
|
|
|
|
-- The key could be used by other files too, so leave the
|
|
|
|
-- content in the annex, and make a copy back to the file.
|
|
|
|
obj <- calcRepo $ gitAnnexLocation key
|
|
|
|
unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
|
|
|
|
warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
|
|
|
|
thawContent file
|
|
|
|
throwM e
|
|
|
|
|
|
|
|
{- Creates the symlink to the annexed content, returns the link target. -}
|
|
|
|
makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String
|
|
|
|
makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
|
|
|
|
l <- calcRepo $ gitAnnexLink file key
|
|
|
|
replaceFile file $ makeAnnexLink l
|
|
|
|
|
|
|
|
-- touch symlink to have same time as the original file,
|
|
|
|
-- as provided in the InodeCache
|
|
|
|
case mcache of
|
|
|
|
#if defined(WITH_CLIBS) && ! defined(__ANDROID__)
|
|
|
|
Just c -> liftIO $ touch file (TimeSpec $ inodeCacheToMtime c) False
|
|
|
|
#else
|
|
|
|
Just _ -> noop
|
|
|
|
#endif
|
|
|
|
Nothing -> noop
|
|
|
|
|
|
|
|
return l
|
|
|
|
|
|
|
|
{- Creates the symlink to the annexed content, and stages it in git.
|
|
|
|
-
|
|
|
|
- As long as the filesystem supports symlinks, we use
|
|
|
|
- git add, rather than directly staging the symlink to git.
|
|
|
|
- Using git add is best because it allows the queuing to work
|
|
|
|
- and is faster (staging the symlink runs hash-object commands each time).
|
|
|
|
- Also, using git add allows it to skip gitignored files, unless forced
|
|
|
|
- to include them.
|
|
|
|
-}
|
|
|
|
addLink :: FilePath -> Key -> Maybe InodeCache -> Annex ()
|
|
|
|
addLink file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
|
|
|
|
( do
|
|
|
|
_ <- makeLink file key mcache
|
|
|
|
ps <- forceParams
|
|
|
|
Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
|
|
|
|
, do
|
|
|
|
l <- makeLink file key mcache
|
|
|
|
addAnnexLink l file
|
|
|
|
)
|
|
|
|
|
|
|
|
{- Parameters to pass to git add, forcing addition of ignored files. -}
|
|
|
|
forceParams :: Annex [CommandParam]
|
|
|
|
forceParams = ifM (Annex.getState Annex.force)
|
|
|
|
( return [Param "-f"]
|
|
|
|
, return []
|
|
|
|
)
|