found a way to extract InodeCache from git index
This will allow a race-free database transition. It is somewhat hairy in that it depends on an unspecified git output format.
This commit is contained in:
parent
6147130e86
commit
89bdcffdfa
3 changed files with 87 additions and 9 deletions
|
@ -1,6 +1,6 @@
|
||||||
{- git ls-files interface
|
{- git ls-files interface
|
||||||
-
|
-
|
||||||
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -31,9 +31,12 @@ import Git
|
||||||
import Git.Command
|
import Git.Command
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.Sha
|
import Git.Sha
|
||||||
|
import Utility.InodeCache
|
||||||
|
import Utility.TimeStamp
|
||||||
|
|
||||||
import Numeric
|
import Numeric
|
||||||
import System.Posix.Types
|
import System.Posix.Types
|
||||||
|
import qualified Data.Map as M
|
||||||
|
|
||||||
{- Scans for files that are checked into git's index at the specified locations. -}
|
{- Scans for files that are checked into git's index at the specified locations. -}
|
||||||
inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
|
inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
|
||||||
|
@ -275,3 +278,53 @@ reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest
|
||||||
, itreeitemtype = Nothing
|
, itreeitemtype = Nothing
|
||||||
, isha = Nothing
|
, isha = Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{- Gets the InodeCache equivilant information stored in the git index.
|
||||||
|
-
|
||||||
|
- Note that this uses a --debug option whose output could change at some
|
||||||
|
- point in the future. If the output is not as expected, will use Nothing.
|
||||||
|
-}
|
||||||
|
inodeCaches :: [FilePath] -> Repo -> IO ([(FilePath, Maybe InodeCache)], IO Bool)
|
||||||
|
inodeCaches locs repo = do
|
||||||
|
(ls, cleanup) <- pipeNullSplit params repo
|
||||||
|
return (parse Nothing ls, cleanup)
|
||||||
|
where
|
||||||
|
params =
|
||||||
|
Param "ls-files" :
|
||||||
|
Param "--cached" :
|
||||||
|
Param "-z" :
|
||||||
|
Param "--debug" :
|
||||||
|
Param "--" :
|
||||||
|
map File locs
|
||||||
|
|
||||||
|
parse Nothing (f:ls) = parse (Just f) ls
|
||||||
|
parse (Just f) (s:[]) =
|
||||||
|
let i = parsedebug s
|
||||||
|
in (f, i) : []
|
||||||
|
parse (Just f) (s:ls) =
|
||||||
|
let (d, f') = splitdebug s
|
||||||
|
i = parsedebug d
|
||||||
|
in (f, i) : parse (Just f') ls
|
||||||
|
parse _ _ = []
|
||||||
|
|
||||||
|
-- First 5 lines are --debug output, remainder is the next filename.
|
||||||
|
-- This assumes that --debug does not start outputting more lines.
|
||||||
|
splitdebug s = case splitc '\n' s of
|
||||||
|
(d1:d2:d3:d4:d5:rest) ->
|
||||||
|
( intercalate "\n" [d1, d2, d3, d4, d5]
|
||||||
|
, intercalate "\n" rest
|
||||||
|
)
|
||||||
|
_ -> ("", s)
|
||||||
|
|
||||||
|
-- This parser allows for some changes to the --debug output,
|
||||||
|
-- including reordering, or adding more items.
|
||||||
|
parsedebug s = do
|
||||||
|
let l = words s
|
||||||
|
let iskey v = ":" `isSuffixOf` v
|
||||||
|
let m = M.fromList $ zip
|
||||||
|
(filter iskey l)
|
||||||
|
(filter (not . iskey) l)
|
||||||
|
mkInodeCache
|
||||||
|
<$> (readish =<< M.lookup "ino:" m)
|
||||||
|
<*> (readish =<< M.lookup "size:" m)
|
||||||
|
<*> (parsePOSIXTime =<< (replace ":" "." <$> M.lookup "mtime:" m))
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
module Utility.InodeCache (
|
module Utility.InodeCache (
|
||||||
InodeCache,
|
InodeCache,
|
||||||
|
mkInodeCache,
|
||||||
InodeComparisonType(..),
|
InodeComparisonType(..),
|
||||||
inodeCacheFileSize,
|
inodeCacheFileSize,
|
||||||
|
|
||||||
|
@ -61,6 +62,10 @@ data InodeCachePrim = InodeCachePrim FileID FileSize MTime
|
||||||
newtype InodeCache = InodeCache InodeCachePrim
|
newtype InodeCache = InodeCache InodeCachePrim
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
|
mkInodeCache :: FileID -> FileSize -> POSIXTime -> InodeCache
|
||||||
|
mkInodeCache inode sz mtime = InodeCache $
|
||||||
|
InodeCachePrim inode sz (MTimeHighRes mtime)
|
||||||
|
|
||||||
inodeCacheFileSize :: InodeCache -> FileSize
|
inodeCacheFileSize :: InodeCache -> FileSize
|
||||||
inodeCacheFileSize (InodeCache (InodeCachePrim _ sz _)) = sz
|
inodeCacheFileSize (InodeCache (InodeCachePrim _ sz _)) = sz
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ This todo documents the state of that branch.
|
||||||
|
|
||||||
Fixed by converting to blob.
|
Fixed by converting to blob.
|
||||||
|
|
||||||
* IKey could fail to round-trip as well, when a Key contains something
|
* SKey and IKey could fail to round-trip as well, when a Key contains something
|
||||||
(eg, a filename extension) that is not valid in the current locale,
|
(eg, a filename extension) that is not valid in the current locale,
|
||||||
for similar reasons to SFilePath. Using BLOB would be better.
|
for similar reasons to SFilePath. Using BLOB would be better.
|
||||||
|
|
||||||
|
@ -86,9 +86,8 @@ remaining todo:
|
||||||
> to a PersistText.
|
> to a PersistText.
|
||||||
>
|
>
|
||||||
> So that seems to leave using a BLOB to store a ByteString for
|
> So that seems to leave using a BLOB to store a ByteString for
|
||||||
> SKey, IKey, and SFilePath. Attached patch shows how to do that,
|
> SKey, IKey, and SFilePath. But old git-annex won't be able to
|
||||||
> but old git-annex won't be able to read the updated databases,
|
> read the updated databases, and won't know that it can't read them!
|
||||||
> and won't know that it can't read them!
|
|
||||||
>
|
>
|
||||||
> This seems to call for a flag day, throwing out the old database
|
> This seems to call for a flag day, throwing out the old database
|
||||||
> contents and regenerating them from other data:
|
> contents and regenerating them from other data:
|
||||||
|
@ -102,7 +101,8 @@ remaining todo:
|
||||||
> difficult to rebuild, what if in the middle of an interrupted
|
> difficult to rebuild, what if in the middle of an interrupted
|
||||||
> export?
|
> export?
|
||||||
>
|
>
|
||||||
> updateExportTreeFromLog only updates two tables, not others
|
> updateExportTreeFromLog only updates two tables (ExportTree and
|
||||||
|
> ExportTreeCurrent), not others (Exported and ExportedDirectory).
|
||||||
>
|
>
|
||||||
> Conceptually, this is the same as the repo being lost and another
|
> Conceptually, this is the same as the repo being lost and another
|
||||||
> clone being used to update the export. The clone can only learn
|
> clone being used to update the export. The clone can only learn
|
||||||
|
@ -114,6 +114,26 @@ remaining todo:
|
||||||
> Use scanUnlockedFiles to repopulate the Associated table.
|
> Use scanUnlockedFiles to repopulate the Associated table.
|
||||||
>
|
>
|
||||||
> But that does not repopulate the Content table. Doing so needs
|
> But that does not repopulate the Content table. Doing so needs
|
||||||
to iterate over the unlocked files, filter out any that are modified,
|
> to iterate over the unlocked files, filter out any that are modified,
|
||||||
and record the InodeCaches of the unmodified ones. Seems that it would
|
> and record the InodeCaches of the unmodified ones. Seems that it would
|
||||||
have to use git's index to know which files are modified.
|
> have to use git's index to know which files are modified.
|
||||||
|
>
|
||||||
|
> There is a race; a file could be modified after getting the list of
|
||||||
|
> modified files. To completely avoid that race is tricky. To mostly
|
||||||
|
> eliminate it, just generate the InodeCache, then check
|
||||||
|
> if the file is still unmodified, then check if the InodeCache is still
|
||||||
|
> valid. That leaves some much less likely races where files are being
|
||||||
|
> repeatedly swapped and the InodeCache generations see one file while
|
||||||
|
> the git ls-files --modified see the other one.
|
||||||
|
>
|
||||||
|
> To fully avoid the race, use git ls-files --cached --debug,
|
||||||
|
> and parse the debug output into a InodeCache! This way the info
|
||||||
|
> from git's index is simply copied over into the git-annex database.
|
||||||
|
> One little problem: The --debug format is not specified and may change.
|
||||||
|
> However, it has never actually changed since it was introduced in 2010
|
||||||
|
> (git v1.8.3.1), except for a fix for an unsigned int overflow bug that
|
||||||
|
> was fixed in April 2019.
|
||||||
|
>
|
||||||
|
> Alternatively, can keep the old database code and use it to read the old
|
||||||
|
> databases during the migration. But then bad data that got in due to the
|
||||||
|
> encoding problems will persist.
|
||||||
|
|
Loading…
Reference in a new issue