git-annex/Annex/WorkTree.hs
Joey Hess c35a9047d3
improve data types for sqlite
This is a non-backwards compatable change, so not suitable for merging
w/o a annex.version bump and transition code. Not yet tested.

This improves performance of git-annex benchmark --databases
across the board by 10-25%, since eg Key roundtrips as a ByteString.

(serializeKey' produces a lazy ByteString, so there is still a
copy involved in converting it to a strict ByteString. It may be faster
to switch to using bytestring-strict-builder.)

FilePath and Key are both stored as blobs. This avoids mojibake in some
situations. It would be possible to use varchar instead, if persistent
could avoid converting that to Text, but it seems there is no good
way to do so. See doc/todo/sqlite_database_improvements.mdwn

Eliminated some ugly artifacts of using Read/Show serialization;
constructors and quoted strings are no longer stored in sqlite.

Renamed SRef to SSha to reflect that it is only ever a git sha,
not a ref name. Since it is limited to the characters in a sha,
it is not affected by mojibake, so still uses String.
2019-10-29 17:05:36 -04:00

105 lines
3.4 KiB
Haskell

{- git-annex worktree files
-
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.WorkTree where
import Annex.Common
import Annex.Link
import Annex.CatFile
import Annex.Content
import Annex.ReplaceFile
import Annex.CurrentBranch
import Annex.InodeSentinal
import Utility.InodeCache
import Git.FilePath
import qualified Git.Ref
import qualified Git.LsTree
import qualified Git.Types
import qualified Database.Keys
import qualified Database.Keys.SQL
{- Looks up the key corresponding to an annexed file in the work tree,
- by examining what the file links to.
-
- An unlocked file will not have a link on disk, so fall back to
- looking for a pointer to a key in git.
-
- When in an adjusted branch that may have hidden the file, looks for a
- pointer to a key in the original branch.
-}
lookupFile :: FilePath -> Annex (Maybe Key)
lookupFile = lookupFile' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist file)
( catKeyFile file
, catKeyFileHidden file =<< getCurrentBranch
)
lookupFileNotHidden :: FilePath -> Annex (Maybe Key)
lookupFileNotHidden = lookupFile' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist file)
( catKeyFile file
, return Nothing
)
lookupFile' :: (FilePath -> Annex (Maybe Key)) -> FilePath -> Annex (Maybe Key)
lookupFile' catkeyfile file = isAnnexLink file >>= \case
Just key -> return (Just key)
Nothing -> catkeyfile file
{- Modifies an action to only act on files that are already annexed,
- and passes the key on to it. -}
whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a)
whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a
ifAnnexed file yes no = maybe no yes =<< lookupFile file
{- Find all unlocked files and update the keys database for them.
-
- This is expensive, and so normally the associated files are updated
- incrementally when changes are noticed. So, this only needs to be done
- when initializing/upgrading a v6+ mode repository.
-
- Also, the content for the unlocked file may already be present as
- an annex object. If so, make the unlocked file use that content
- when replacefiles is True.
-}
scanUnlockedFiles :: Bool -> Annex ()
scanUnlockedFiles replacefiles = whenM (inRepo Git.Ref.headExists) $ do
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef
forM_ l $ \i ->
when (isregfile i) $
maybe noop (add i)
=<< catKey (Git.LsTree.sha i)
liftIO $ void cleanup
where
isregfile i = case Git.Types.toTreeItemType (Git.LsTree.mode i) of
Just Git.Types.TreeFile -> True
Just Git.Types.TreeExecutable -> True
_ -> False
add i k = do
let tf = Git.LsTree.file i
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.addAssociatedFileFast k tf
whenM (pure replacefiles <&&> inAnnex k) $ do
f <- fromRepo $ fromTopFilePath tf
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f
ic <- replaceFile f $ \tmp ->
linkFromAnnex k tmp destmode >>= \case
LinkAnnexOk ->
withTSDelta (liftIO . genInodeCache tmp)
LinkAnnexNoop -> return Nothing
LinkAnnexFailed -> liftIO $ do
writePointerFile tmp k destmode
return Nothing
maybe noop (restagePointerFile (Restage True) f) ic