2012-06-06 04:03:08 +00:00
|
|
|
{- git-update-index library
|
|
|
|
-
|
2022-09-23 18:28:52 +00:00
|
|
|
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
2012-06-06 04:03:08 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2012-06-06 04:03:08 +00:00
|
|
|
-}
|
|
|
|
|
2019-11-25 20:18:19 +00:00
|
|
|
{-# LANGUAGE BangPatterns, OverloadedStrings, CPP #-}
|
2012-06-10 16:50:06 +00:00
|
|
|
|
2012-06-06 04:03:08 +00:00
|
|
|
module Git.UpdateIndex (
|
|
|
|
Streamer,
|
2012-06-08 04:29:39 +00:00
|
|
|
pureStreamer,
|
|
|
|
streamUpdateIndex,
|
2014-02-18 21:38:23 +00:00
|
|
|
streamUpdateIndex',
|
2020-06-04 16:05:25 +00:00
|
|
|
withUpdateIndex,
|
2012-06-08 04:29:39 +00:00
|
|
|
lsTree,
|
2014-03-04 19:00:19 +00:00
|
|
|
lsSubTree,
|
2012-06-08 04:29:39 +00:00
|
|
|
updateIndexLine,
|
2013-10-22 16:58:04 +00:00
|
|
|
stageFile,
|
2012-06-10 17:05:58 +00:00
|
|
|
unstageFile,
|
2014-11-13 20:41:21 +00:00
|
|
|
stageSymlink,
|
|
|
|
stageDiffTreeItem,
|
2018-08-17 20:03:40 +00:00
|
|
|
refreshIndex,
|
2012-06-06 04:03:08 +00:00
|
|
|
) where
|
|
|
|
|
|
|
|
import Common
|
|
|
|
import Git
|
2012-06-06 18:26:15 +00:00
|
|
|
import Git.Types
|
2012-06-06 04:03:08 +00:00
|
|
|
import Git.Command
|
2012-06-06 18:26:15 +00:00
|
|
|
import Git.FilePath
|
2012-06-10 17:05:58 +00:00
|
|
|
import Git.Sha
|
2014-12-22 19:32:51 +00:00
|
|
|
import qualified Git.DiffTreeItem as Diff
|
2012-06-06 04:03:08 +00:00
|
|
|
|
2020-10-29 18:20:57 +00:00
|
|
|
import qualified Data.ByteString as S
|
2019-11-25 20:18:19 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
2020-06-04 16:05:25 +00:00
|
|
|
import Control.Monad.IO.Class
|
2019-11-25 20:18:19 +00:00
|
|
|
|
2012-06-06 04:03:08 +00:00
|
|
|
{- Streamers are passed a callback and should feed it lines in the form
|
|
|
|
- read by update-index, and generated by ls-tree. -}
|
2019-11-25 20:18:19 +00:00
|
|
|
type Streamer = (L.ByteString -> IO ()) -> IO ()
|
2012-06-06 04:03:08 +00:00
|
|
|
|
2012-06-08 04:29:39 +00:00
|
|
|
{- A streamer with a precalculated value. -}
|
2019-11-25 20:18:19 +00:00
|
|
|
pureStreamer :: L.ByteString -> Streamer
|
2012-06-10 16:50:06 +00:00
|
|
|
pureStreamer !s = \streamer -> streamer s
|
2012-06-08 04:29:39 +00:00
|
|
|
|
2012-06-06 04:03:08 +00:00
|
|
|
{- Streams content into update-index from a list of Streamers. -}
|
2012-06-08 04:29:39 +00:00
|
|
|
streamUpdateIndex :: Repo -> [Streamer] -> IO ()
|
2020-06-04 16:05:25 +00:00
|
|
|
streamUpdateIndex repo as = withUpdateIndex repo $ \h ->
|
|
|
|
forM_ as $ streamUpdateIndex' h
|
2014-02-18 21:38:23 +00:00
|
|
|
|
2020-06-04 16:05:25 +00:00
|
|
|
data UpdateIndexHandle = UpdateIndexHandle Handle
|
2014-02-18 21:38:23 +00:00
|
|
|
|
|
|
|
streamUpdateIndex' :: UpdateIndexHandle -> Streamer -> IO ()
|
2020-06-04 16:05:25 +00:00
|
|
|
streamUpdateIndex' (UpdateIndexHandle h) a = a $ \s -> do
|
2019-11-25 20:18:19 +00:00
|
|
|
L.hPutStr h s
|
|
|
|
L.hPutStr h "\0"
|
2014-02-18 21:38:23 +00:00
|
|
|
|
2020-06-04 16:05:25 +00:00
|
|
|
withUpdateIndex :: (MonadIO m, MonadMask m) => Repo -> (UpdateIndexHandle -> m a) -> m a
|
|
|
|
withUpdateIndex repo a = bracket setup cleanup go
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
params = map Param ["update-index", "-z", "--index-info"]
|
2020-06-04 16:05:25 +00:00
|
|
|
|
|
|
|
setup = liftIO $ createProcess $
|
|
|
|
(gitCreateProcess params repo)
|
|
|
|
{ std_in = CreatePipe }
|
|
|
|
go p = do
|
|
|
|
r <- a (UpdateIndexHandle (stdinHandle p))
|
|
|
|
liftIO $ do
|
|
|
|
hClose (stdinHandle p)
|
|
|
|
void $ checkSuccessProcess (processHandle p)
|
|
|
|
return r
|
|
|
|
|
|
|
|
cleanup = liftIO . cleanupProcess
|
2012-06-06 04:03:08 +00:00
|
|
|
|
2012-06-08 04:29:39 +00:00
|
|
|
{- A streamer that adds the current tree for a ref. Useful for eg, copying
|
|
|
|
- and modifying branches. -}
|
|
|
|
lsTree :: Ref -> Repo -> Streamer
|
2012-10-04 22:47:31 +00:00
|
|
|
lsTree (Ref x) repo streamer = do
|
|
|
|
(s, cleanup) <- pipeNullSplit params repo
|
|
|
|
mapM_ streamer s
|
|
|
|
void $ cleanup
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2021-08-11 00:45:02 +00:00
|
|
|
params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS x]
|
2014-03-04 19:00:19 +00:00
|
|
|
lsSubTree :: Ref -> FilePath -> Repo -> Streamer
|
|
|
|
lsSubTree (Ref x) p repo streamer = do
|
|
|
|
(s, cleanup) <- pipeNullSplit params repo
|
|
|
|
mapM_ streamer s
|
|
|
|
void $ cleanup
|
|
|
|
where
|
2021-08-11 00:45:02 +00:00
|
|
|
params = map Param ["ls-tree", "-z", "-r", "--full-tree", decodeBS x, p]
|
2012-06-06 18:26:15 +00:00
|
|
|
|
|
|
|
{- Generates a line suitable to be fed into update-index, to add
|
|
|
|
- a given file with a given sha. -}
|
2019-11-25 20:18:19 +00:00
|
|
|
updateIndexLine :: Sha -> TreeItemType -> TopFilePath -> L.ByteString
|
|
|
|
updateIndexLine sha treeitemtype file = L.fromStrict $
|
|
|
|
fmtTreeItemType treeitemtype
|
|
|
|
<> " blob "
|
2020-04-07 15:54:27 +00:00
|
|
|
<> fromRef' sha
|
2019-11-25 20:18:19 +00:00
|
|
|
<> "\t"
|
|
|
|
<> indexPath file
|
2018-05-14 18:22:44 +00:00
|
|
|
|
|
|
|
stageFile :: Sha -> TreeItemType -> FilePath -> Repo -> IO Streamer
|
|
|
|
stageFile sha treeitemtype file repo = do
|
2019-12-09 17:49:05 +00:00
|
|
|
p <- toTopFilePath (toRawFilePath file) repo
|
2018-05-14 18:22:44 +00:00
|
|
|
return $ pureStreamer $ updateIndexLine sha treeitemtype p
|
2013-10-22 16:58:04 +00:00
|
|
|
|
2012-06-10 17:05:58 +00:00
|
|
|
{- A streamer that removes a file from the index. -}
|
|
|
|
unstageFile :: FilePath -> Repo -> IO Streamer
|
|
|
|
unstageFile file repo = do
|
2019-12-09 17:49:05 +00:00
|
|
|
p <- toTopFilePath (toRawFilePath file) repo
|
2014-11-13 20:41:21 +00:00
|
|
|
return $ unstageFile' p
|
|
|
|
|
|
|
|
unstageFile' :: TopFilePath -> Streamer
|
2019-11-25 20:18:19 +00:00
|
|
|
unstageFile' p = pureStreamer $ L.fromStrict $
|
|
|
|
"0 "
|
2020-04-07 15:54:27 +00:00
|
|
|
<> fromRef' deleteSha
|
2019-11-25 20:18:19 +00:00
|
|
|
<> "\t"
|
|
|
|
<> indexPath p
|
2012-06-10 17:05:58 +00:00
|
|
|
|
2012-06-08 04:29:39 +00:00
|
|
|
{- A streamer that adds a symlink to the index. -}
|
2020-10-30 19:55:59 +00:00
|
|
|
stageSymlink :: RawFilePath -> Sha -> Repo -> IO Streamer
|
2012-06-10 23:58:34 +00:00
|
|
|
stageSymlink file sha repo = do
|
2012-06-13 01:13:15 +00:00
|
|
|
!line <- updateIndexLine
|
2012-06-10 23:58:34 +00:00
|
|
|
<$> pure sha
|
2018-05-14 18:22:44 +00:00
|
|
|
<*> pure TreeSymlink
|
2020-10-30 19:55:59 +00:00
|
|
|
<*> toTopFilePath file repo
|
2012-06-08 04:29:39 +00:00
|
|
|
return $ pureStreamer line
|
2013-05-12 22:18:48 +00:00
|
|
|
|
2014-11-13 20:41:21 +00:00
|
|
|
{- A streamer that applies a DiffTreeItem to the index. -}
|
|
|
|
stageDiffTreeItem :: Diff.DiffTreeItem -> Streamer
|
2018-05-14 18:22:44 +00:00
|
|
|
stageDiffTreeItem d = case toTreeItemType (Diff.dstmode d) of
|
2014-11-13 20:41:21 +00:00
|
|
|
Nothing -> unstageFile' (Diff.file d)
|
|
|
|
Just t -> pureStreamer $ updateIndexLine (Diff.dstsha d) t (Diff.file d)
|
|
|
|
|
2013-05-12 22:18:48 +00:00
|
|
|
indexPath :: TopFilePath -> InternalGitPath
|
2019-12-09 17:49:05 +00:00
|
|
|
indexPath = toInternalGitPath . getTopFilePath
|
2018-08-17 20:03:40 +00:00
|
|
|
|
add restage log
When pointer files need to be restaged, they're first written to the
log, and then when the restage operation runs, it reads the log. This
way, if the git-annex process is interrupted before it can do the
restaging, a later git-annex process can do it.
Currently, this lets a git-annex get/drop command be interrupted and
then re-ran, and as long as it gets/drops additional files, it will
clean up after the interrupted command. But more changes are
needed to make it easier to restage after an interrupted process.
Kept using the git queue to run the restage action, even though the
list of files that it builds up for that action is not actually used by
the action. This could perhaps be simplified to make restaging a cleanup
action that gets registered, rather than using the git queue for it. But
I wasn't sure if that would cause visible behavior changes, when eg
dropping a large number of files, currently the git queue flushes
periodically, and so it restages incrementally, rather than all at the
end.
In restagePointerFiles, it reads the restage log twice, once to get
the number of files and size, and a second time to process it.
This seemed better than reading the whole file into memory, since
potentially a huge number of files could be in there. Probably the OS
will cache the file in memory and there will not be much performance
impact. It might be better to keep running tallies in another file
though. But updating that atomically with the log seems hard.
Also note that it's possible for calcRestageLog to see a different file
than streamRestageLog does. More files may be added to the log in
between. That is ok, it will only cause the filterprocessfaster heuristic to
operate with slightly out of date information, so it may make the wrong
choice for the files that got added and be a little slower than ideal.
Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
|
|
|
{- Refreshes the index, by checking file stat information.
|
|
|
|
-
|
|
|
|
- The action is passed a callback that it can use to send filenames to
|
|
|
|
- update-index. Sending Nothing will wait for update-index to finish
|
|
|
|
- updating the index.
|
|
|
|
-}
|
|
|
|
refreshIndex :: (MonadIO m, MonadMask m) => Repo -> ((Maybe RawFilePath -> IO ()) -> m ()) -> m ()
|
2022-09-23 18:28:52 +00:00
|
|
|
refreshIndex repo feeder = bracket
|
|
|
|
(liftIO $ createProcess p)
|
|
|
|
(liftIO . cleanupProcess)
|
|
|
|
go
|
2018-08-17 20:03:40 +00:00
|
|
|
where
|
|
|
|
params =
|
|
|
|
[ Param "update-index"
|
|
|
|
, Param "-q"
|
|
|
|
, Param "--refresh"
|
|
|
|
, Param "-z"
|
|
|
|
, Param "--stdin"
|
|
|
|
]
|
2020-06-04 16:13:26 +00:00
|
|
|
|
|
|
|
p = (gitCreateProcess params repo)
|
|
|
|
{ std_in = CreatePipe }
|
|
|
|
|
2022-09-23 18:28:52 +00:00
|
|
|
go (Just h, _, _, pid) = do
|
add restage log
When pointer files need to be restaged, they're first written to the
log, and then when the restage operation runs, it reads the log. This
way, if the git-annex process is interrupted before it can do the
restaging, a later git-annex process can do it.
Currently, this lets a git-annex get/drop command be interrupted and
then re-ran, and as long as it gets/drops additional files, it will
clean up after the interrupted command. But more changes are
needed to make it easier to restage after an interrupted process.
Kept using the git queue to run the restage action, even though the
list of files that it builds up for that action is not actually used by
the action. This could perhaps be simplified to make restaging a cleanup
action that gets registered, rather than using the git queue for it. But
I wasn't sure if that would cause visible behavior changes, when eg
dropping a large number of files, currently the git queue flushes
periodically, and so it restages incrementally, rather than all at the
end.
In restagePointerFiles, it reads the restage log twice, once to get
the number of files and size, and a second time to process it.
This seemed better than reading the whole file into memory, since
potentially a huge number of files could be in there. Probably the OS
will cache the file in memory and there will not be much performance
impact. It might be better to keep running tallies in another file
though. But updating that atomically with the log seems hard.
Also note that it's possible for calcRestageLog to see a different file
than streamRestageLog does. More files may be added to the log in
between. That is ok, it will only cause the filterprocessfaster heuristic to
operate with slightly out of date information, so it may make the wrong
choice for the files that got added and be a little slower than ideal.
Sponsored-by: Dartmouth College's DANDI project
2022-09-23 18:38:59 +00:00
|
|
|
let closer = do
|
|
|
|
hClose h
|
|
|
|
forceSuccessProcess p pid
|
|
|
|
feeder $ \case
|
|
|
|
Just f -> S.hPut h (S.snoc f 0)
|
|
|
|
Nothing -> closer
|
|
|
|
liftIO $ closer
|
2022-09-23 18:28:52 +00:00
|
|
|
go _ = error "internal"
|