found a way to extract InodeCache from git index
This will allow a race-free database transition. It is somewhat hairy in that it depends on an unspecified git output format.
This commit is contained in:
parent
6147130e86
commit
89bdcffdfa
3 changed files with 87 additions and 9 deletions
|
@ -1,6 +1,6 @@
|
|||
{- git ls-files interface
|
||||
-
|
||||
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -31,9 +31,12 @@ import Git
|
|||
import Git.Command
|
||||
import Git.Types
|
||||
import Git.Sha
|
||||
import Utility.InodeCache
|
||||
import Utility.TimeStamp
|
||||
|
||||
import Numeric
|
||||
import System.Posix.Types
|
||||
import qualified Data.Map as M
|
||||
|
||||
{- Scans for files that are checked into git's index at the specified locations. -}
|
||||
inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
|
||||
|
@ -275,3 +278,53 @@ reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest
|
|||
, itreeitemtype = Nothing
|
||||
, isha = Nothing
|
||||
}
|
||||
|
||||
{- Gets the InodeCache equivilant information stored in the git index.
|
||||
-
|
||||
- Note that this uses a --debug option whose output could change at some
|
||||
- point in the future. If the output is not as expected, will use Nothing.
|
||||
-}
|
||||
inodeCaches :: [FilePath] -> Repo -> IO ([(FilePath, Maybe InodeCache)], IO Bool)
|
||||
inodeCaches locs repo = do
|
||||
(ls, cleanup) <- pipeNullSplit params repo
|
||||
return (parse Nothing ls, cleanup)
|
||||
where
|
||||
params =
|
||||
Param "ls-files" :
|
||||
Param "--cached" :
|
||||
Param "-z" :
|
||||
Param "--debug" :
|
||||
Param "--" :
|
||||
map File locs
|
||||
|
||||
parse Nothing (f:ls) = parse (Just f) ls
|
||||
parse (Just f) (s:[]) =
|
||||
let i = parsedebug s
|
||||
in (f, i) : []
|
||||
parse (Just f) (s:ls) =
|
||||
let (d, f') = splitdebug s
|
||||
i = parsedebug d
|
||||
in (f, i) : parse (Just f') ls
|
||||
parse _ _ = []
|
||||
|
||||
-- First 5 lines are --debug output, remainder is the next filename.
|
||||
-- This assumes that --debug does not start outputting more lines.
|
||||
splitdebug s = case splitc '\n' s of
|
||||
(d1:d2:d3:d4:d5:rest) ->
|
||||
( intercalate "\n" [d1, d2, d3, d4, d5]
|
||||
, intercalate "\n" rest
|
||||
)
|
||||
_ -> ("", s)
|
||||
|
||||
-- This parser allows for some changes to the --debug output,
|
||||
-- including reordering, or adding more items.
|
||||
parsedebug s = do
|
||||
let l = words s
|
||||
let iskey v = ":" `isSuffixOf` v
|
||||
let m = M.fromList $ zip
|
||||
(filter iskey l)
|
||||
(filter (not . iskey) l)
|
||||
mkInodeCache
|
||||
<$> (readish =<< M.lookup "ino:" m)
|
||||
<*> (readish =<< M.lookup "size:" m)
|
||||
<*> (parsePOSIXTime =<< (replace ":" "." <$> M.lookup "mtime:" m))
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
module Utility.InodeCache (
|
||||
InodeCache,
|
||||
mkInodeCache,
|
||||
InodeComparisonType(..),
|
||||
inodeCacheFileSize,
|
||||
|
||||
|
@ -61,6 +62,10 @@ data InodeCachePrim = InodeCachePrim FileID FileSize MTime
|
|||
newtype InodeCache = InodeCache InodeCachePrim
|
||||
deriving (Show)
|
||||
|
||||
mkInodeCache :: FileID -> FileSize -> POSIXTime -> InodeCache
|
||||
mkInodeCache inode sz mtime = InodeCache $
|
||||
InodeCachePrim inode sz (MTimeHighRes mtime)
|
||||
|
||||
inodeCacheFileSize :: InodeCache -> FileSize
|
||||
inodeCacheFileSize (InodeCache (InodeCachePrim _ sz _)) = sz
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ This todo documents the state of that branch.
|
|||
|
||||
Fixed by converting to blob.
|
||||
|
||||
* IKey could fail to round-trip as well, when a Key contains something
|
||||
* SKey and IKey could fail to round-trip as well, when a Key contains something
|
||||
(eg, a filename extension) that is not valid in the current locale,
|
||||
for similar reasons to SFilePath. Using BLOB would be better.
|
||||
|
||||
|
@ -86,9 +86,8 @@ remaining todo:
|
|||
> to a PersistText.
|
||||
>
|
||||
> So that seems to leave using a BLOB to store a ByteString for
|
||||
> SKey, IKey, and SFilePath. Attached patch shows how to do that,
|
||||
> but old git-annex won't be able to read the updated databases,
|
||||
> and won't know that it can't read them!
|
||||
> SKey, IKey, and SFilePath. But old git-annex won't be able to
|
||||
> read the updated databases, and won't know that it can't read them!
|
||||
>
|
||||
> This seems to call for a flag day, throwing out the old database
|
||||
> contents and regenerating them from other data:
|
||||
|
@ -102,7 +101,8 @@ remaining todo:
|
|||
> difficult to rebuild, what if in the middle of an interrupted
|
||||
> export?
|
||||
>
|
||||
> updateExportTreeFromLog only updates two tables, not others
|
||||
> updateExportTreeFromLog only updates two tables (ExportTree and
|
||||
> ExportTreeCurrent), not others (Exported and ExportedDirectory).
|
||||
>
|
||||
> Conceptually, this is the same as the repo being lost and another
|
||||
> clone being used to update the export. The clone can only learn
|
||||
|
@ -114,6 +114,26 @@ remaining todo:
|
|||
> Use scanUnlockedFiles to repopulate the Associated table.
|
||||
>
|
||||
> But that does not repopulate the Content table. Doing so needs
|
||||
to iterate over the unlocked files, filter out any that are modified,
|
||||
and record the InodeCaches of the unmodified ones. Seems that it would
|
||||
have to use git's index to know which files are modified.
|
||||
> to iterate over the unlocked files, filter out any that are modified,
|
||||
> and record the InodeCaches of the unmodified ones. Seems that it would
|
||||
> have to use git's index to know which files are modified.
|
||||
>
|
||||
> There is a race; a file could be modified after getting the list of
|
||||
> modified files. To completely avoid that race is tricky. To mostly
|
||||
> eliminate it, just generate the InodeCache, then check
|
||||
> if the file is still unmodified, then check if the InodeCache is still
|
||||
> valid. That leaves some much less likely races where files are being
|
||||
> repeatedly swapped and the InodeCache generations see one file while
|
||||
> the git ls-files --modified see the other one.
|
||||
>
|
||||
> To fully avoid the race, use git ls-files --cached --debug,
|
||||
> and parse the debug output into a InodeCache! This way the info
|
||||
> from git's index is simply copied over into the git-annex database.
|
||||
> One little problem: The --debug format is not specified and may change.
|
||||
> However, it has never actually changed since it was introduced in 2010
|
||||
> (git v1.8.3.1), except for a fix for an unsigned int overflow bug that
|
||||
> was fixed in April 2019.
|
||||
>
|
||||
> Alternatively, can keep the old database code and use it to read the old
|
||||
> databases during the migration. But then bad data that got in due to the
|
||||
> encoding problems will persist.
|
||||
|
|
Loading…
Reference in a new issue