c35a9047d3
This is a non-backwards compatable change, so not suitable for merging w/o a annex.version bump and transition code. Not yet tested. This improves performance of git-annex benchmark --databases across the board by 10-25%, since eg Key roundtrips as a ByteString. (serializeKey' produces a lazy ByteString, so there is still a copy involved in converting it to a strict ByteString. It may be faster to switch to using bytestring-strict-builder.) FilePath and Key are both stored as blobs. This avoids mojibake in some situations. It would be possible to use varchar instead, if persistent could avoid converting that to Text, but it seems there is no good way to do so. See doc/todo/sqlite_database_improvements.mdwn Eliminated some ugly artifacts of using Read/Show serialization; constructors and quoted strings are no longer stored in sqlite. Renamed SRef to SSha to reflect that it is only ever a git sha, not a ref name. Since it is limited to the characters in a sha, it is not affected by mojibake, so still uses String.
304 lines
10 KiB
Haskell
304 lines
10 KiB
Haskell
{- Sqlite database of information about Keys
|
|
-
|
|
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
module Database.Keys (
|
|
DbHandle,
|
|
closeDb,
|
|
addAssociatedFile,
|
|
getAssociatedFiles,
|
|
getAssociatedKey,
|
|
removeAssociatedFile,
|
|
storeInodeCaches,
|
|
storeInodeCaches',
|
|
addInodeCaches,
|
|
getInodeCaches,
|
|
removeInodeCaches,
|
|
isInodeKnown,
|
|
runWriter,
|
|
) where
|
|
|
|
import qualified Database.Keys.SQL as SQL
|
|
import Database.Types
|
|
import Database.Keys.Handle
|
|
import qualified Database.Queue as H
|
|
import Database.Init
|
|
import Annex.Locations
|
|
import Annex.Common hiding (delete)
|
|
import qualified Annex
|
|
import Annex.LockFile
|
|
import Annex.CatFile
|
|
import Annex.Content.PointerFile
|
|
import Annex.Link
|
|
import Utility.InodeCache
|
|
import Annex.InodeSentinal
|
|
import Git
|
|
import Git.FilePath
|
|
import Git.Command
|
|
import Git.Types
|
|
import Git.Index
|
|
|
|
{- Runs an action that reads from the database.
|
|
-
|
|
- If the database doesn't already exist, it's not created; mempty is
|
|
- returned instead. This way, when the keys database is not in use,
|
|
- there's minimal overhead in checking it.
|
|
-
|
|
- If the database is already open, any writes are flushed to it, to ensure
|
|
- consistency.
|
|
-
|
|
- Any queued writes will be flushed before the read.
|
|
-}
|
|
runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v
|
|
runReader a = do
|
|
h <- getDbHandle
|
|
withDbState h go
|
|
where
|
|
go DbUnavailable = return (mempty, DbUnavailable)
|
|
go st@(DbOpen qh) = do
|
|
liftIO $ H.flushDbQueue qh
|
|
v <- a (SQL.ReadHandle qh)
|
|
return (v, st)
|
|
go DbClosed = do
|
|
st' <- openDb False DbClosed
|
|
v <- case st' of
|
|
(DbOpen qh) -> a (SQL.ReadHandle qh)
|
|
_ -> return mempty
|
|
return (v, st')
|
|
|
|
runReaderIO :: Monoid v => (SQL.ReadHandle -> IO v) -> Annex v
|
|
runReaderIO a = runReader (liftIO . a)
|
|
|
|
{- Runs an action that writes to the database. Typically this is used to
|
|
- queue changes, which will be flushed at a later point.
|
|
-
|
|
- The database is created if it doesn't exist yet. -}
|
|
runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex ()
|
|
runWriter a = do
|
|
h <- getDbHandle
|
|
withDbState h go
|
|
where
|
|
go st@(DbOpen qh) = do
|
|
v <- a (SQL.WriteHandle qh)
|
|
return (v, st)
|
|
go st = do
|
|
st' <- openDb True st
|
|
v <- case st' of
|
|
DbOpen qh -> a (SQL.WriteHandle qh)
|
|
_ -> error "internal"
|
|
return (v, st')
|
|
|
|
runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex ()
|
|
runWriterIO a = runWriter (liftIO . a)
|
|
|
|
{- Gets the handle cached in Annex state; creates a new one if it's not yet
|
|
- available, but doesn't open the database. -}
|
|
getDbHandle :: Annex DbHandle
|
|
getDbHandle = go =<< Annex.getState Annex.keysdbhandle
|
|
where
|
|
go (Just h) = pure h
|
|
go Nothing = do
|
|
h <- liftIO newDbHandle
|
|
Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h }
|
|
return h
|
|
|
|
{- Opens the database, perhaps creating it if it doesn't exist yet.
|
|
-
|
|
- Multiple readers and writers can have the database open at the same
|
|
- time. Database.Handle deals with the concurrency issues.
|
|
- The lock is held while opening the database, so that when
|
|
- the database doesn't exist yet, one caller wins the lock and
|
|
- can create it undisturbed.
|
|
-}
|
|
openDb :: Bool -> DbState -> Annex DbState
|
|
openDb _ st@(DbOpen _) = return st
|
|
openDb False DbUnavailable = return DbUnavailable
|
|
openDb createdb _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKeysDbLock $ do
|
|
dbdir <- fromRepo gitAnnexKeysDb
|
|
let db = dbdir </> "db"
|
|
dbexists <- liftIO $ doesFileExist db
|
|
case (dbexists, createdb) of
|
|
(True, _) -> open db
|
|
(False, True) -> do
|
|
initDb db SQL.createTables
|
|
open db
|
|
(False, False) -> return DbUnavailable
|
|
where
|
|
-- If permissions don't allow opening the database, treat it as if
|
|
-- it does not exist.
|
|
permerr e = case createdb of
|
|
False -> return DbUnavailable
|
|
True -> throwM e
|
|
|
|
open db = do
|
|
qh <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
|
|
reconcileStaged qh
|
|
return $ DbOpen qh
|
|
|
|
{- Closes the database if it was open. Any writes will be flushed to it.
|
|
-
|
|
- This does not normally need to be called; the database will auto-close
|
|
- when the handle is garbage collected. However, this can be used to
|
|
- force a re-read of the database, in case another process has written
|
|
- data to it.
|
|
-}
|
|
closeDb :: Annex ()
|
|
closeDb = Annex.getState Annex.keysdbhandle >>= \case
|
|
Nothing -> return ()
|
|
Just h -> liftIO (closeDbHandle h)
|
|
|
|
addAssociatedFile :: Key -> TopFilePath -> Annex ()
|
|
addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f
|
|
|
|
{- Note that the files returned were once associated with the key, but
|
|
- some of them may not be any longer. -}
|
|
getAssociatedFiles :: Key -> Annex [TopFilePath]
|
|
getAssociatedFiles = runReaderIO . SQL.getAssociatedFiles
|
|
|
|
{- Gets any keys that are on record as having a particular associated file.
|
|
- (Should be one or none but the database doesn't enforce that.) -}
|
|
getAssociatedKey :: TopFilePath -> Annex [Key]
|
|
getAssociatedKey = runReaderIO . SQL.getAssociatedKey
|
|
|
|
removeAssociatedFile :: Key -> TopFilePath -> Annex ()
|
|
removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile k
|
|
|
|
{- Stats the files, and stores their InodeCaches. -}
|
|
storeInodeCaches :: Key -> [FilePath] -> Annex ()
|
|
storeInodeCaches k fs = storeInodeCaches' k fs []
|
|
|
|
storeInodeCaches' :: Key -> [FilePath] -> [InodeCache] -> Annex ()
|
|
storeInodeCaches' k fs ics = withTSDelta $ \d ->
|
|
addInodeCaches k . (++ ics) . catMaybes
|
|
=<< liftIO (mapM (`genInodeCache` d) fs)
|
|
|
|
addInodeCaches :: Key -> [InodeCache] -> Annex ()
|
|
addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is
|
|
|
|
{- A key may have multiple InodeCaches; one for the annex object, and one
|
|
- for each pointer file that is a copy of it. -}
|
|
getInodeCaches :: Key -> Annex [InodeCache]
|
|
getInodeCaches = runReaderIO . SQL.getInodeCaches
|
|
|
|
removeInodeCaches :: Key -> Annex ()
|
|
removeInodeCaches = runWriterIO . SQL.removeInodeCaches
|
|
|
|
isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool
|
|
isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
|
|
|
|
{- Looks at staged changes to find when unlocked files are copied/moved,
|
|
- and updates associated files in the keys database.
|
|
-
|
|
- Since staged changes can be dropped later, does not remove any
|
|
- associated files; only adds new associated files.
|
|
-
|
|
- This needs to be run before querying the keys database so that
|
|
- information is consistent with the state of the repository.
|
|
-
|
|
- To avoid unncessary work, the index file is statted, and if it's not
|
|
- changed since last time this was run, nothing is done.
|
|
-
|
|
- Note that this is run with a lock held, so only one process can be
|
|
- running this at a time.
|
|
-
|
|
- This also cleans up after a race between eg a git mv and git-annex
|
|
- get/drop/similar. If git moves the file between this being run and the
|
|
- get/drop, the moved file won't be updated for the get/drop.
|
|
- The next time this runs, it will see the staged change. It then checks
|
|
- if the worktree file's content availability does not match the git-annex
|
|
- content availablity, and makes changes as necessary to reconcile them.
|
|
-
|
|
- Note that if a commit happens before this runs again, it won't see
|
|
- the staged change. Instead, during the commit, git will run the clean
|
|
- filter. If a drop missed the file then the file is added back into the
|
|
- annex. If a get missed the file then the clean filter populates the
|
|
- file.
|
|
-}
|
|
reconcileStaged :: H.DbQueue -> Annex ()
|
|
reconcileStaged qh = do
|
|
gitindex <- inRepo currentIndexFile
|
|
indexcache <- fromRepo gitAnnexKeysDbIndexCache
|
|
withTSDelta (liftIO . genInodeCache gitindex) >>= \case
|
|
Just cur ->
|
|
liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case
|
|
Nothing -> go cur indexcache
|
|
Just prev -> ifM (compareInodeCaches prev cur)
|
|
( noop
|
|
, go cur indexcache
|
|
)
|
|
Nothing -> noop
|
|
where
|
|
go cur indexcache = do
|
|
(l, cleanup) <- inRepo $ pipeNullSplit diff
|
|
changed <- procdiff l False
|
|
void $ liftIO cleanup
|
|
-- Flush database changes immediately
|
|
-- so other processes can see them.
|
|
when changed $
|
|
liftIO $ H.flushDbQueue qh
|
|
liftIO $ writeFile indexcache $ showInodeCache cur
|
|
|
|
diff =
|
|
-- Avoid using external diff command, which would be slow.
|
|
-- (The -G option may make it be used otherwise.)
|
|
[ Param "-c", Param "diff.external="
|
|
-- Avoid running smudge or clean filters, since we want the
|
|
-- raw output, and they would block trying to access the
|
|
-- locked database. The --raw normally avoids git diff
|
|
-- running them, but older versions of git need this.
|
|
, Param "-c", Param "filter.annex.smudge="
|
|
, Param "-c", Param "filter.annex.clean="
|
|
, Param "diff"
|
|
, Param "--cached"
|
|
, Param "--raw"
|
|
, Param "-z"
|
|
, Param "--abbrev=40"
|
|
-- Optimization: Only find pointer files. This is not
|
|
-- perfect. A file could start with this and not be a
|
|
-- pointer file. And a pointer file that is replaced with
|
|
-- a non-pointer file will match this.
|
|
, Param $ "-G^" ++ toInternalGitPath (pathSeparator:objectDir)
|
|
-- Don't include files that were deleted, because this only
|
|
-- wants to update information for files that are present
|
|
-- in the index.
|
|
, Param "--diff-filter=AMUT"
|
|
-- Disable rename detection.
|
|
, Param "--no-renames"
|
|
-- Avoid other complications.
|
|
, Param "--ignore-submodules=all"
|
|
, Param "--no-ext-diff"
|
|
]
|
|
|
|
procdiff (info:file:rest) changed = case words info of
|
|
((':':_srcmode):dstmode:_srcsha:dstsha:_change:[])
|
|
-- Only want files, not symlinks
|
|
| dstmode /= fmtTreeItemType TreeSymlink -> do
|
|
maybe noop (reconcile (asTopFilePath file))
|
|
=<< catKey (Ref dstsha)
|
|
procdiff rest True
|
|
| otherwise -> procdiff rest changed
|
|
_ -> return changed -- parse failed
|
|
procdiff _ changed = return changed
|
|
|
|
-- Note that database writes done in here will not necessarily
|
|
-- be visible to database reads also done in here.
|
|
reconcile file key = do
|
|
liftIO $ SQL.addAssociatedFileFast key file (SQL.WriteHandle qh)
|
|
caches <- liftIO $ SQL.getInodeCaches key (SQL.ReadHandle qh)
|
|
keyloc <- calcRepo (gitAnnexLocation key)
|
|
keypopulated <- sameInodeCache keyloc caches
|
|
p <- fromRepo $ fromTopFilePath file
|
|
filepopulated <- sameInodeCache p caches
|
|
case (keypopulated, filepopulated) of
|
|
(True, False) ->
|
|
populatePointerFile (Restage True) key keyloc p >>= \case
|
|
Nothing -> return ()
|
|
Just ic -> liftIO $
|
|
SQL.addInodeCaches key [ic] (SQL.WriteHandle qh)
|
|
(False, True) -> depopulatePointerFile key p
|
|
_ -> return ()
|