use InodeCache when dropping a key to see if a pointer file can be safely reset
The Keys database can hold multiple inode caches for a given key. One for the annex object, and one for each pointer file, which may not be hard linked to it. Inode caches for a key are recorded when its content is added to the annex, but only if it has known pointer files. This is to avoid the overhead of maintaining the database when not needed. When the smudge filter outputs a file's content, the inode cache is not updated, because git's smudge interface doesn't let us write the file. So, dropping will fall back to doing an expensive verification then. Ideally, git's interface would be improved, and then the inode cache could be updated then too.
This commit is contained in:
parent
5e8c628d2e
commit
ce73a96e4e
6 changed files with 62 additions and 46 deletions
|
@ -17,6 +17,7 @@ import System.Posix.Signals
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
|
import qualified Database.Keys
|
||||||
|
|
||||||
{- Actions to perform each time ran. -}
|
{- Actions to perform each time ran. -}
|
||||||
startup :: Annex ()
|
startup :: Annex ()
|
||||||
|
@ -32,4 +33,5 @@ shutdown :: Bool -> Annex ()
|
||||||
shutdown nocommit = do
|
shutdown nocommit = do
|
||||||
saveState nocommit
|
saveState nocommit
|
||||||
sequence_ =<< M.elems <$> Annex.getState Annex.cleanup
|
sequence_ =<< M.elems <$> Annex.getState Annex.cleanup
|
||||||
|
Database.Keys.shutdown
|
||||||
liftIO reapZombies -- zombies from long-running git processes
|
liftIO reapZombies -- zombies from long-running git processes
|
||||||
|
|
|
@ -451,7 +451,9 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
|
||||||
fs <- Database.Keys.getAssociatedFiles key
|
fs <- Database.Keys.getAssociatedFiles key
|
||||||
if null fs
|
if null fs
|
||||||
then freezeContent dest
|
then freezeContent dest
|
||||||
else mapM_ (populatePointerFile key dest) fs
|
else do
|
||||||
|
mapM_ (populatePointerFile key dest) fs
|
||||||
|
Database.Keys.storeInodeCaches key (dest:fs)
|
||||||
)
|
)
|
||||||
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
|
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
|
||||||
|
|
||||||
|
@ -505,7 +507,9 @@ linkAnnex key src = do
|
||||||
( return LinkAnnexNoop
|
( return LinkAnnexNoop
|
||||||
, modifyContent dest $
|
, modifyContent dest $
|
||||||
ifM (liftIO $ createLinkOrCopy src dest)
|
ifM (liftIO $ createLinkOrCopy src dest)
|
||||||
( return LinkAnnexOk
|
( do
|
||||||
|
Database.Keys.storeInodeCaches key [dest, src]
|
||||||
|
return LinkAnnexOk
|
||||||
, return LinkAnnexFailed
|
, return LinkAnnexFailed
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -601,6 +605,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
|
||||||
removeInodeCache key
|
removeInodeCache key
|
||||||
mapM_ (void . tryIO . resetPointerFile key)
|
mapM_ (void . tryIO . resetPointerFile key)
|
||||||
=<< Database.Keys.getAssociatedFiles key
|
=<< Database.Keys.getAssociatedFiles key
|
||||||
|
Database.Keys.removeInodeCaches key
|
||||||
removedirect fs = do
|
removedirect fs = do
|
||||||
cache <- recordedInodeCache key
|
cache <- recordedInodeCache key
|
||||||
removeInodeCache key
|
removeInodeCache key
|
||||||
|
@ -613,8 +618,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
|
||||||
{- To safely reset a pointer file, it has to be the unmodified content of
|
{- To safely reset a pointer file, it has to be the unmodified content of
|
||||||
- the key. The expensive way to tell is to do a verification of its content.
|
- the key. The expensive way to tell is to do a verification of its content.
|
||||||
- The cheaper way is to see if the InodeCache for the key matches the
|
- The cheaper way is to see if the InodeCache for the key matches the
|
||||||
- file.
|
- file. -}
|
||||||
-}
|
|
||||||
resetPointerFile :: Key -> FilePath -> Annex ()
|
resetPointerFile :: Key -> FilePath -> Annex ()
|
||||||
resetPointerFile key f = go =<< geti
|
resetPointerFile key f = go =<< geti
|
||||||
where
|
where
|
||||||
|
@ -624,10 +628,14 @@ resetPointerFile key f = go =<< geti
|
||||||
secureErase f
|
secureErase f
|
||||||
liftIO $ nukeFile f
|
liftIO $ nukeFile f
|
||||||
liftIO $ writeFile f (formatPointer key)
|
liftIO $ writeFile f (formatPointer key)
|
||||||
, noop
|
-- Can't delete the pointer file.
|
||||||
|
-- If it was a hard link to the annex object,
|
||||||
|
-- that object might have been frozen as part of the
|
||||||
|
-- removal process, so thaw it.
|
||||||
|
, thawContent f
|
||||||
)
|
)
|
||||||
cheapcheck fc = maybe (return False) (compareInodeCaches fc)
|
cheapcheck fc = anyM (compareInodeCaches fc)
|
||||||
=<< Database.Keys.getInodeCache key
|
=<< Database.Keys.getInodeCaches key
|
||||||
expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f)
|
expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f)
|
||||||
-- The file could have been modified while it was
|
-- The file could have been modified while it was
|
||||||
-- being verified. Detect that.
|
-- being verified. Detect that.
|
||||||
|
|
|
@ -48,7 +48,7 @@ smudge file = do
|
||||||
case parseLinkOrPointer b of
|
case parseLinkOrPointer b of
|
||||||
Nothing -> liftIO $ B.putStr b
|
Nothing -> liftIO $ B.putStr b
|
||||||
Just k -> do
|
Just k -> do
|
||||||
updateAssociatedFiles k file
|
Database.Keys.addAssociatedFile k file
|
||||||
content <- calcRepo (gitAnnexLocation k)
|
content <- calcRepo (gitAnnexLocation k)
|
||||||
liftIO $ B.hPut stdout . fromMaybe b
|
liftIO $ B.hPut stdout . fromMaybe b
|
||||||
=<< catchMaybeIO (B.readFile content)
|
=<< catchMaybeIO (B.readFile content)
|
||||||
|
@ -65,7 +65,7 @@ clean file = do
|
||||||
else ifM (shouldAnnex file)
|
else ifM (shouldAnnex file)
|
||||||
( do
|
( do
|
||||||
k <- ingest file
|
k <- ingest file
|
||||||
updateAssociatedFiles k file
|
Database.Keys.addAssociatedFile k file
|
||||||
liftIO $ emitPointer k
|
liftIO $ emitPointer k
|
||||||
, liftIO $ B.hPut stdout b
|
, liftIO $ B.hPut stdout b
|
||||||
)
|
)
|
||||||
|
@ -100,8 +100,3 @@ ingest file = do
|
||||||
|
|
||||||
emitPointer :: Key -> IO ()
|
emitPointer :: Key -> IO ()
|
||||||
emitPointer = putStrLn . formatPointer
|
emitPointer = putStrLn . formatPointer
|
||||||
|
|
||||||
updateAssociatedFiles :: Key -> FilePath -> Annex ()
|
|
||||||
updateAssociatedFiles k f = do
|
|
||||||
Database.Keys.addAssociatedFile k f
|
|
||||||
Database.Keys.flushDb
|
|
||||||
|
|
|
@ -13,15 +13,17 @@
|
||||||
module Database.Keys (
|
module Database.Keys (
|
||||||
DbHandle,
|
DbHandle,
|
||||||
openDb,
|
openDb,
|
||||||
flushDb,
|
|
||||||
closeDb,
|
closeDb,
|
||||||
|
shutdown,
|
||||||
addAssociatedFile,
|
addAssociatedFile,
|
||||||
getAssociatedFiles,
|
getAssociatedFiles,
|
||||||
removeAssociatedFile,
|
removeAssociatedFile,
|
||||||
setInodeCache,
|
storeInodeCaches,
|
||||||
getInodeCache,
|
addInodeCaches,
|
||||||
|
getInodeCaches,
|
||||||
|
removeInodeCaches,
|
||||||
AssociatedId,
|
AssociatedId,
|
||||||
DataId,
|
ContentId,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Database.Types
|
import Database.Types
|
||||||
|
@ -35,6 +37,7 @@ import Annex.Perms
|
||||||
import Annex.LockFile
|
import Annex.LockFile
|
||||||
import Messages
|
import Messages
|
||||||
import Utility.InodeCache
|
import Utility.InodeCache
|
||||||
|
import Annex.InodeSentinal
|
||||||
|
|
||||||
import Database.Persist.TH
|
import Database.Persist.TH
|
||||||
import Database.Esqueleto hiding (Key)
|
import Database.Esqueleto hiding (Key)
|
||||||
|
@ -44,10 +47,10 @@ Associated
|
||||||
key SKey
|
key SKey
|
||||||
file FilePath
|
file FilePath
|
||||||
KeyFileIndex key file
|
KeyFileIndex key file
|
||||||
Data
|
Content
|
||||||
key SKey
|
key SKey
|
||||||
inodeCache SInodeCache
|
cache SInodeCache
|
||||||
KeyIndex key
|
KeyCacheIndex key cache
|
||||||
|]
|
|]
|
||||||
|
|
||||||
{- Opens the database, creating it if it doesn't exist yet. -}
|
{- Opens the database, creating it if it doesn't exist yet. -}
|
||||||
|
@ -62,7 +65,7 @@ openDb = withExclusiveLock gitAnnexKeysDbLock $ do
|
||||||
runMigrationSilent migrateKeysDb
|
runMigrationSilent migrateKeysDb
|
||||||
setAnnexDirPerm dbdir
|
setAnnexDirPerm dbdir
|
||||||
setAnnexFilePerm db
|
setAnnexFilePerm db
|
||||||
h <- liftIO $ H.openDb db "data"
|
h <- liftIO $ H.openDb db "content"
|
||||||
|
|
||||||
-- work around https://github.com/yesodweb/persistent/issues/474
|
-- work around https://github.com/yesodweb/persistent/issues/474
|
||||||
liftIO setConsoleEncoding
|
liftIO setConsoleEncoding
|
||||||
|
@ -85,9 +88,12 @@ dbHandle = maybe startup return =<< Annex.getState Annex.keysdbhandle
|
||||||
Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h }
|
Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h }
|
||||||
return h
|
return h
|
||||||
|
|
||||||
{- Flushes any changes made to the database. -}
|
shutdown :: Annex ()
|
||||||
flushDb :: Annex ()
|
shutdown = maybe noop go =<< Annex.getState Annex.keysdbhandle
|
||||||
flushDb = withDbHandle H.flushQueueDb
|
where
|
||||||
|
go h = do
|
||||||
|
Annex.changeState $ \s -> s { Annex.keysdbhandle = Nothing }
|
||||||
|
liftIO $ closeDb h
|
||||||
|
|
||||||
addAssociatedFile :: Key -> FilePath -> Annex ()
|
addAssociatedFile :: Key -> FilePath -> Annex ()
|
||||||
addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do
|
addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do
|
||||||
|
@ -113,21 +119,35 @@ getAssociatedFiles' sk = do
|
||||||
return $ map unValue l
|
return $ map unValue l
|
||||||
|
|
||||||
removeAssociatedFile :: Key -> FilePath -> Annex ()
|
removeAssociatedFile :: Key -> FilePath -> Annex ()
|
||||||
removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
|
removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
|
||||||
delete $ from $ \r -> do
|
delete $ from $ \r -> do
|
||||||
where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f)
|
where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f)
|
||||||
where
|
where
|
||||||
sk = toSKey k
|
sk = toSKey k
|
||||||
|
|
||||||
setInodeCache :: Key -> InodeCache -> Annex ()
|
{- Stats the files, and stores their InodeCaches. -}
|
||||||
setInodeCache k i = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
|
storeInodeCaches :: Key -> [FilePath] -> Annex ()
|
||||||
void $ upsert (Data (toSKey k) (toSInodeCache i)) []
|
storeInodeCaches k fs = withTSDelta $ \d ->
|
||||||
|
addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs)
|
||||||
|
|
||||||
getInodeCache :: Key -> Annex (Maybe (InodeCache))
|
addInodeCaches :: Key -> [InodeCache] -> Annex ()
|
||||||
getInodeCache k = withDbHandle $ \h -> H.queryDb h $ do
|
addInodeCaches k is = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
|
||||||
|
forM_ is $ \i -> insertUnique $ Content (toSKey k) (toSInodeCache i)
|
||||||
|
|
||||||
|
{- A key may have multiple InodeCaches; one for the annex object, and one
|
||||||
|
- for each pointer file that is a copy of it. -}
|
||||||
|
getInodeCaches :: Key -> Annex [InodeCache]
|
||||||
|
getInodeCaches k = withDbHandle $ \h -> H.queryDb h $ do
|
||||||
l <- select $ from $ \r -> do
|
l <- select $ from $ \r -> do
|
||||||
where_ (r ^. DataKey ==. val sk)
|
where_ (r ^. ContentKey ==. val sk)
|
||||||
return (r ^. DataInodeCache)
|
return (r ^. ContentCache)
|
||||||
return $ headMaybe $ map (fromSInodeCache . unValue) l
|
return $ map (fromSInodeCache . unValue) l
|
||||||
|
where
|
||||||
|
sk = toSKey k
|
||||||
|
|
||||||
|
removeInodeCaches :: Key -> Annex ()
|
||||||
|
removeInodeCaches k = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
|
||||||
|
delete $ from $ \r -> do
|
||||||
|
where_ (r ^. ContentKey ==. val sk)
|
||||||
where
|
where
|
||||||
sk = toSKey k
|
sk = toSKey k
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{- Caching a file's inode, size, and modification time
|
{- Caching a file's inode, size, and modification time
|
||||||
- to see when it's changed.
|
- to see when it's changed.
|
||||||
-
|
-
|
||||||
- Copyright 2013, 2014 Joey Hess <id@joeyh.name>
|
- Copyright 2013-2014 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- License: BSD-2-clause
|
- License: BSD-2-clause
|
||||||
-}
|
-}
|
||||||
|
|
|
@ -328,6 +328,8 @@ files to be unlocked, while the indirect upgrades don't touch the files.
|
||||||
* inAnnex check should fail in the case where an annexed object is unlocked
|
* inAnnex check should fail in the case where an annexed object is unlocked
|
||||||
and has had its content changed. Could use an InodeCache for
|
and has had its content changed. Could use an InodeCache for
|
||||||
such objects. This parallels how inAnnex checks work for direct mode.
|
such objects. This parallels how inAnnex checks work for direct mode.
|
||||||
|
* Also, Annex.Content.prepSendAnnex should check the InodeCache for
|
||||||
|
changes.
|
||||||
* Reconcile staged changes into the associated files database, whenever
|
* Reconcile staged changes into the associated files database, whenever
|
||||||
the database is queried.
|
the database is queried.
|
||||||
* See if the cases where the associated files database is not used can be
|
* See if the cases where the associated files database is not used can be
|
||||||
|
@ -338,17 +340,6 @@ files to be unlocked, while the indirect upgrades don't touch the files.
|
||||||
(when not in direct mode).
|
(when not in direct mode).
|
||||||
However, beware over-optimisation breaking the assistant or perhaps other
|
However, beware over-optimisation breaking the assistant or perhaps other
|
||||||
long-lived processes.
|
long-lived processes.
|
||||||
* Update pointer files when dropping the content of a key.
|
|
||||||
- Check the associated files database to find associated files for the key.
|
|
||||||
- Verify that worktree files are not modified from the annexed object.
|
|
||||||
How? InodeCache could be maintained, but the smudge filer interface
|
|
||||||
wouldn't let it be updated when smudging a file. May need to take
|
|
||||||
an expensive path:
|
|
||||||
1. stat object file
|
|
||||||
2. stat worktree file
|
|
||||||
3. if same stat, ok else hash worktree file
|
|
||||||
4. stat worktree file again after checking hash; make sure it's
|
|
||||||
unchanged from earlier stat
|
|
||||||
* Convert `git annex unlock` to stage a pointer file, and hard link to the
|
* Convert `git annex unlock` to stage a pointer file, and hard link to the
|
||||||
annexed object (or write pointer file if annexed object not present).
|
annexed object (or write pointer file if annexed object not present).
|
||||||
- Also needs to thaw annex object file
|
- Also needs to thaw annex object file
|
||||||
|
|
Loading…
Reference in a new issue