add inode cache to the db

Renamed the db to keys, since it is various info about a Keys.

Dropping a key will update its pointer files, as long as their content can
be verified to be unmodified. This falls back to checksum verification, but
I want it to use an InodeCache of the key, for speed. But, I have not made
anything populate that cache yet.
This commit is contained in:
Joey Hess 2015-12-09 17:00:37 -04:00
parent 3311c48631
commit 5e8c628d2e
Failed to extract signature
9 changed files with 117 additions and 53 deletions

View file

@ -60,7 +60,7 @@ import Types.NumCopies
import Types.LockCache
import Types.DesktopNotify
import Types.CleanupActions
import qualified Database.AssociatedFiles.Types
import qualified Database.Keys.Types
#ifdef WITH_QUVI
import Utility.Quvi (QuviVersion)
#endif
@ -135,7 +135,7 @@ data AnnexState = AnnexState
, desktopnotify :: DesktopNotify
, workers :: [Either AnnexState (Async AnnexState)]
, concurrentjobs :: Maybe Int
, associatedfilesdbhandle :: Maybe Database.AssociatedFiles.Types.DbHandle
, keysdbhandle :: Maybe Database.Keys.Types.DbHandle
}
newState :: GitConfig -> Git.Repo -> AnnexState
@ -181,7 +181,7 @@ newState c r = AnnexState
, desktopnotify = mempty
, workers = []
, concurrentjobs = Nothing
, associatedfilesdbhandle = Nothing
, keysdbhandle = Nothing
}
{- Makes an Annex state object for the specified git repo.

View file

@ -73,7 +73,8 @@ import qualified Backend
import Types.NumCopies
import Annex.UUID
import Annex.InodeSentinal
import qualified Database.AssociatedFiles as AssociatedFiles
import Utility.InodeCache
import qualified Database.Keys
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
@ -447,10 +448,10 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
( alreadyhave
, modifyContent dest $ do
liftIO $ moveFile src dest
fs <- AssociatedFiles.getDb key
fs <- Database.Keys.getAssociatedFiles key
if null fs
then freezeContent dest
else mapM_ (populateAssociatedFile key dest) fs
else mapM_ (populatePointerFile key dest) fs
)
storeindirect = storeobject =<< calcRepo (gitAnnexLocation key)
@ -480,8 +481,8 @@ moveAnnex key src = withObjectLoc key storeobject storedirect
alreadyhave = liftIO $ removeFile src
populateAssociatedFile :: Key -> FilePath -> FilePath -> Annex ()
populateAssociatedFile k obj f = go =<< isPointerFile f
populatePointerFile :: Key -> FilePath -> FilePath -> Annex ()
populatePointerFile k obj f = go =<< isPointerFile f
where
go (Just k') | k == k' = liftIO $ do
nukeFile f
@ -598,6 +599,8 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
secureErase file
liftIO $ nukeFile file
removeInodeCache key
mapM_ (void . tryIO . resetPointerFile key)
=<< Database.Keys.getAssociatedFiles key
removedirect fs = do
cache <- recordedInodeCache key
removeInodeCache key
@ -607,6 +610,32 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
secureErase f
replaceFile f $ makeAnnexLink l
{- To safely reset a pointer file, it has to be the unmodified content of
- the key. The expensive way to tell is to do a verification of its content.
- The cheaper way is to see if the InodeCache for the key matches the
- file.
-}
resetPointerFile :: Key -> FilePath -> Annex ()
resetPointerFile key f = go =<< geti
where
go Nothing = noop
go (Just fc) = ifM (cheapcheck fc <||> expensivecheck fc)
( do
secureErase f
liftIO $ nukeFile f
liftIO $ writeFile f (formatPointer key)
, noop
)
cheapcheck fc = maybe (return False) (compareInodeCaches fc)
=<< Database.Keys.getInodeCache key
expensivecheck fc = ifM (verifyKeyContent AlwaysVerify Types.Remote.UnVerified key f)
-- The file could have been modified while it was
-- being verified. Detect that.
( geti >>= maybe (return False) (compareInodeCaches fc)
, return False
)
geti = withTSDelta (liftIO . genInodeCache f)
{- Runs the secure erase command if set, otherwise does nothing.
- File may or may not be deleted at the end; caller is responsible for
- making sure it's deleted. -}

View file

@ -180,15 +180,6 @@ sameFileStatus key f status = do
([], Nothing) -> return True
_ -> return False
{- If the inodes have changed, only the size and mtime are compared. -}
compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
compareInodeCaches x y
| compareStrong x y = return True
| otherwise = ifM inodesChanged
( return $ compareWeak x y
, return False
)
elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
elemInodeCaches _ [] = return False
elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)

View file

@ -14,6 +14,15 @@ import qualified Annex
import Utility.InodeCache
import Annex.Perms
{- If the inodes have changed, only the size and mtime are compared. -}
compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
compareInodeCaches x y
| compareStrong x y = return True
| otherwise = ifM inodesChanged
( return $ compareWeak x y
, return False
)
{- Some filesystems get new inodes each time they are mounted.
- In order to work on such a filesystem, a sentinal file is used to detect
- when the inodes have changed.

View file

@ -16,7 +16,7 @@ import Annex.FileMatcher
import Types.KeySource
import Backend
import Logs.Location
import qualified Database.AssociatedFiles as AssociatedFiles
import qualified Database.Keys
import qualified Data.ByteString.Lazy as B
@ -103,5 +103,5 @@ emitPointer = putStrLn . formatPointer
updateAssociatedFiles :: Key -> FilePath -> Annex ()
updateAssociatedFiles k f = do
AssociatedFiles.addDb k f
AssociatedFiles.flushDb
Database.Keys.addAssociatedFile k f
Database.Keys.flushDb

View file

@ -1,4 +1,4 @@
{- Sqlite database used for tracking a key's associated files.
{- Sqlite database of information about Keys
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-:
@ -10,19 +10,22 @@
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.AssociatedFiles (
module Database.Keys (
DbHandle,
openDb,
flushDb,
closeDb,
addDb,
getDb,
removeDb,
addAssociatedFile,
getAssociatedFiles,
removeAssociatedFile,
setInodeCache,
getInodeCache,
AssociatedId,
DataId,
) where
import Database.Types
import Database.AssociatedFiles.Types
import Database.Keys.Types
import qualified Database.Handle as H
import Locations
import Common hiding (delete)
@ -31,30 +34,35 @@ import Types.Key
import Annex.Perms
import Annex.LockFile
import Messages
import Utility.InodeCache
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
share [mkPersist sqlSettings, mkMigrate "migrateAssociated"] [persistLowerCase|
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
Associated
key SKey
file FilePath
KeyFileIndex key file
Data
key SKey
inodeCache SInodeCache
KeyIndex key
|]
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: Annex DbHandle
openDb = withExclusiveLock gitAnnexAssociatedFilesDbLock $ do
dbdir <- fromRepo gitAnnexAssociatedFilesDb
openDb = withExclusiveLock gitAnnexKeysDbLock $ do
dbdir <- fromRepo gitAnnexKeysDb
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
liftIO $ do
createDirectoryIfMissing True dbdir
H.initDb db $ void $
runMigrationSilent migrateAssociated
runMigrationSilent migrateKeysDb
setAnnexDirPerm dbdir
setAnnexFilePerm db
h <- liftIO $ H.openDb db "associated"
h <- liftIO $ H.openDb db "data"
-- work around https://github.com/yesodweb/persistent/issues/474
liftIO setConsoleEncoding
@ -70,19 +78,19 @@ withDbHandle a = do
liftIO $ a h
dbHandle :: Annex DbHandle
dbHandle = maybe startup return =<< Annex.getState Annex.associatedfilesdbhandle
dbHandle = maybe startup return =<< Annex.getState Annex.keysdbhandle
where
startup = do
h <- openDb
Annex.changeState $ \s -> s { Annex.associatedfilesdbhandle = Just h }
Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h }
return h
{- Flushes any changes made to the database. -}
flushDb :: Annex ()
flushDb = withDbHandle H.flushQueueDb
addDb :: Key -> FilePath -> Annex ()
addDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do
addAssociatedFile :: Key -> FilePath -> Annex ()
addAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do
-- If the same file was associated with a different key before,
-- remove that.
delete $ from $ \r -> do
@ -91,21 +99,35 @@ addDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $ do
where
sk = toSKey k
{- Note that the files returned used to be associated with the key, but
{- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -}
getDb :: Key -> Annex [FilePath]
getDb k = withDbHandle $ \h -> H.queryDb h $ getDb' $ toSKey k
getAssociatedFiles :: Key -> Annex [FilePath]
getAssociatedFiles k = withDbHandle $ \h -> H.queryDb h $
getAssociatedFiles' $ toSKey k
getDb' :: SKey -> SqlPersistM [FilePath]
getDb' sk = do
getAssociatedFiles' :: SKey -> SqlPersistM [FilePath]
getAssociatedFiles' sk = do
l <- select $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val sk)
return (r ^. AssociatedFile)
return $ map unValue l
removeDb :: Key -> FilePath -> Annex ()
removeDb k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
removeAssociatedFile :: Key -> FilePath -> Annex ()
removeAssociatedFile k f = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
delete $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val f)
where
sk = toSKey k
setInodeCache :: Key -> InodeCache -> Annex ()
setInodeCache k i = withDbHandle $ \h -> H.queueDb h (\_ _ -> pure True) $
void $ upsert (Data (toSKey k) (toSInodeCache i)) []
getInodeCache :: Key -> Annex (Maybe (InodeCache))
getInodeCache k = withDbHandle $ \h -> H.queryDb h $ do
l <- select $ from $ \r -> do
where_ (r ^. DataKey ==. val sk)
return (r ^. DataInodeCache)
return $ headMaybe $ map (fromSInodeCache. unValue) l
where
sk = toSKey k

View file

@ -1,11 +1,11 @@
{- Sqlite database used for tracking a key's associated files, data types.
{- Sqlite database of information about Keys, data types.
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU GPL version 3 or higher.
-}
module Database.AssociatedFiles.Types (
module Database.Keys.Types (
DbHandle(..)
) where

View file

@ -13,6 +13,7 @@ import Database.Persist.TH
import Data.Maybe
import Types.Key
import Utility.InodeCache
-- A serialized Key
newtype SKey = SKey String
@ -22,6 +23,18 @@ toSKey :: Key -> SKey
toSKey = SKey . key2file
fromSKey :: SKey -> Key
fromSKey (SKey s) = fromMaybe (error $ "bad serialied key " ++ s) (file2key s)
fromSKey (SKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s)
derivePersistField "SKey"
-- A serialized InodeCache
newtype SInodeCache = I String
deriving (Show, Read)
toSInodeCache :: InodeCache -> SInodeCache
toSInodeCache = I . showInodeCache
fromSInodeCache :: SInodeCache -> InodeCache
fromSInodeCache (I s) = fromMaybe (error $ "bad serialied InodeCache " ++ s) (readInodeCache s)
derivePersistField "SInodeCache"

View file

@ -29,8 +29,8 @@ module Locations (
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexAssociatedFilesDb,
gitAnnexAssociatedFilesDbLock,
gitAnnexKeysDb,
gitAnnexKeysDbLock,
gitAnnexFsckState,
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
@ -239,13 +239,13 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
{- .git/annex/map/ contains a database for the associated files map -}
gitAnnexAssociatedFilesDb :: Git.Repo -> FilePath
gitAnnexAssociatedFilesDb r = gitAnnexDir r </> "map"
{- .git/annex/keys/ contains a database of information about keys. -}
gitAnnexKeysDb :: Git.Repo -> FilePath
gitAnnexKeysDb r = gitAnnexDir r </> "keys"
{- Lock file for the associated files map database. -}
gitAnnexAssociatedFilesDbLock :: Git.Repo -> FilePath
gitAnnexAssociatedFilesDbLock r = gitAnnexAssociatedFilesDb r ++ "lck"
{- Lock file for the keys database. -}
gitAnnexKeysDbLock :: Git.Repo -> FilePath
gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ "lck"
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}