git-annex/Git/Tree.hs

466 lines
15 KiB
Haskell
Raw Permalink Normal View History

2016-02-23 20:36:08 +00:00
{- git trees
-
- Copyright 2016-2023 Joey Hess <id@joeyh.name>
2016-02-23 20:36:08 +00:00
-
- Licensed under the GNU AGPL version 3 or higher.
2016-02-23 20:36:08 +00:00
-}
2016-03-11 23:29:43 +00:00
{-# LANGUAGE BangPatterns, TypeSynonymInstances, FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
2016-02-23 20:36:08 +00:00
module Git.Tree (
Tree(..),
TreeContent(..),
getTree,
recordTree,
recordTree',
recordSubTree,
TreeItem(..),
treeItemsToTree,
treeItemToLsTreeItem,
lsTreeItemToTreeItem,
adjustTree,
graftTree,
graftTree',
withMkTreeHandle,
MkTreeHandle,
sendMkTree,
finishMkTree,
treeMode,
2016-02-23 20:36:08 +00:00
) where
import Common
import Git
import Git.FilePath
import Git.Types
import Git.Command
2016-02-23 22:30:11 +00:00
import Git.Sha
2016-02-23 20:36:08 +00:00
import qualified Git.LsTree as LsTree
import qualified Utility.CoProcess as CoProcess
import qualified System.FilePath.ByteString as P
2016-02-23 20:36:08 +00:00
import Numeric
import System.Posix.Types
import Control.Monad.IO.Class
2016-03-11 20:30:06 +00:00
import qualified Data.Set as S
2016-03-11 23:29:43 +00:00
import qualified Data.Map as M
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as S8
2016-02-23 20:36:08 +00:00
newtype Tree = Tree [TreeContent]
deriving (Show)
data TreeContent
-- A blob object in the tree.
2016-02-23 20:36:08 +00:00
= TreeBlob TopFilePath FileMode Sha
-- A subtree that is already recorded in git, with a known sha.
| RecordedSubTree TopFilePath Sha [TreeContent]
-- A subtree that has not yet been recorded in git.
| NewSubTree TopFilePath [TreeContent]
-- A commit object that is part of a tree (used for submodules)
| TreeCommit TopFilePath FileMode Sha
2016-03-11 23:29:43 +00:00
deriving (Show, Eq, Ord)
2016-02-23 20:36:08 +00:00
{- Gets the Tree for a Ref. -}
getTree :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO Tree
getTree recursive r repo = do
(l, cleanup) <- lsTreeWithObjects recursive r repo
let !t = either (\e -> giveup ("ls-tree parse error:" ++ e)) id
(extractTree l)
void cleanup
return t
2016-02-23 20:36:08 +00:00
lsTreeWithObjects :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects recursive =
LsTree.lsTree' [Param "-t"] recursive (LsTree.LsTreeLong False)
2016-02-23 20:36:08 +00:00
2016-02-24 02:21:25 +00:00
newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle
withMkTreeHandle :: (MonadIO m, MonadMask m) => Repo -> (MkTreeHandle -> m a) -> m a
withMkTreeHandle repo a = bracketIO setup cleanup (a . MkTreeHandle)
where
setup = gitCoProcessStart False ps repo
ps = [Param "mktree", Param "--missing", Param "--batch", Param "-z"]
2016-02-24 02:21:25 +00:00
cleanup = CoProcess.stop
2016-02-23 20:36:08 +00:00
{- Records a Tree in the Repo, returning its Sha.
-
- Efficiently handles subtrees, by only recording ones that have not
- already been recorded before. And even when many subtrees need to be
- recorded, it's done with a single call to git mktree, using its batch
- interface.
-}
recordTree :: Tree -> Repo -> IO Sha
2016-02-24 02:21:25 +00:00
recordTree t repo = withMkTreeHandle repo $ \h -> recordTree' h t
2016-02-23 20:36:08 +00:00
2016-02-24 02:21:25 +00:00
recordTree' :: MkTreeHandle -> Tree -> IO Sha
recordTree' h (Tree l) = mkTree h =<< mapM (recordSubTree h) l
2016-02-23 20:36:08 +00:00
{- Note that the returned RecordedSubTree does not have its [TreeContent]
- list populated. This is a memory optimisation, since the list is not
- used. -}
2016-02-24 02:21:25 +00:00
recordSubTree :: MkTreeHandle -> TreeContent -> IO TreeContent
2016-02-23 20:36:08 +00:00
recordSubTree h (NewSubTree d l) = do
sha <- mkTree h =<< mapM (recordSubTree h) l
return (RecordedSubTree d sha [])
2016-02-23 20:36:08 +00:00
recordSubTree _ alreadyrecorded = return alreadyrecorded
sendMkTree :: MkTreeHandle -> FileMode -> ObjectType -> Sha -> TopFilePath -> IO ()
sendMkTree (MkTreeHandle cp) fm ot s f =
CoProcess.send cp $ \h ->
hPutStr h (mkTreeOutput fm ot s f)
finishMkTree :: MkTreeHandle -> IO Sha
finishMkTree (MkTreeHandle cp) = do
CoProcess.send cp $ \h ->
-- NUL to signal end of tree to --batch
hPutStr h "\NUL"
getSha "mktree" (CoProcess.receive cp S8.hGetLine)
mkTree :: MkTreeHandle -> [TreeContent] -> IO Sha
mkTree h l = do
forM_ l $ \case
TreeBlob f fm s -> sendMkTree h fm BlobObject s f
RecordedSubTree f s _ -> sendMkTree h treeMode TreeObject s f
NewSubTree _ _ -> error "recordSubTree internal error; unexpected NewSubTree"
TreeCommit f fm s -> sendMkTree h fm CommitObject s f
finishMkTree h
2016-02-23 20:36:08 +00:00
treeMode :: FileMode
treeMode = 0o040000
2016-02-23 20:36:08 +00:00
mkTreeOutput :: FileMode -> ObjectType -> Sha -> TopFilePath -> String
2016-02-23 21:21:42 +00:00
mkTreeOutput fm ot s f = concat
[ showOct fm ""
, " "
, decodeBS (fmtObjectType ot)
2016-02-23 21:21:42 +00:00
, " "
, fromRef s
, "\t"
, takeFileName (fromRawFilePath (getTopFilePath f))
2016-02-23 21:21:42 +00:00
, "\NUL"
]
data TreeItem = TreeItem TopFilePath FileMode Sha
deriving (Show, Eq)
2016-03-11 18:08:06 +00:00
treeItemToTreeContent :: TreeItem -> TreeContent
treeItemToTreeContent (TreeItem f m s) = case toTreeItemType m of
Just TreeSubmodule -> TreeCommit f m s
_ -> TreeBlob f m s
2016-03-11 23:29:43 +00:00
treeItemToLsTreeItem :: TreeItem -> LsTree.TreeItem
treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem
{ LsTree.mode = mode
, LsTree.typeobj = fmtObjectType $ case toTreeItemType mode of
Just TreeSubmodule -> CommitObject
Just TreeSubtree -> TreeObject
_ -> BlobObject
, LsTree.sha = sha
, LsTree.size = Nothing
, LsTree.file = f
}
lsTreeItemToTreeItem :: LsTree.TreeItem -> TreeItem
lsTreeItemToTreeItem ti = TreeItem
(LsTree.file ti)
(LsTree.mode ti)
(LsTree.sha ti)
2016-03-11 23:29:43 +00:00
treeItemsToTree :: [TreeItem] -> Tree
treeItemsToTree = go M.empty
where
go m [] = Tree $ filter inTopTree (M.elems m)
2016-03-11 23:29:43 +00:00
go m (i:is)
| inTopTree p =
2016-03-11 23:29:43 +00:00
go (M.insert p (treeItemToTreeContent i) m) is
| otherwise = case M.lookup idir m of
Just (NewSubTree d l) ->
go (addsubtree idir m (NewSubTree d (c:l))) is
_ ->
go (addsubtree idir m (NewSubTree (asTopFilePath idir) [c])) is
2016-03-11 23:29:43 +00:00
where
p = gitPath i
idir = P.takeDirectory p
2016-03-11 23:29:43 +00:00
c = treeItemToTreeContent i
addsubtree d m t
| not (inTopTree d) =
2016-03-11 23:29:43 +00:00
let m' = M.insert d t m
in case M.lookup parent m' of
Just (NewSubTree d' l) ->
let l' = filter (\ti -> gitPath ti /= d) l
in addsubtree parent m' (NewSubTree d' (t:l'))
_ -> addsubtree parent m' (NewSubTree (asTopFilePath parent) [t])
2016-03-11 23:29:43 +00:00
| otherwise = M.insert d t m
where
parent = P.takeDirectory d
2016-03-11 18:08:06 +00:00
{- Flattens the top N levels of a Tree. -}
flattenTree :: Int -> Tree -> Tree
flattenTree 0 t = t
flattenTree n (Tree l) = Tree (concatMap (go n) l)
where
go 0 c = [c]
go _ b@(TreeBlob _ _ _) = [b]
go n' (RecordedSubTree _ _ l') = concatMap (go (n'-1)) l'
go n' (NewSubTree _ l') = concatMap (go (n'-1)) l'
go _ c@(TreeCommit _ _ _) = [c]
{- Applies an adjustment to items in a tree.
-
2016-03-11 20:45:40 +00:00
- While less flexible than using getTree and recordTree,
- this avoids buffering the whole tree in memory.
-}
2016-03-11 20:30:06 +00:00
adjustTree
2016-04-08 20:09:00 +00:00
:: (Functor m, MonadIO m, MonadMask m)
2016-03-11 20:30:06 +00:00
=> (TreeItem -> m (Maybe TreeItem))
-- ^ Adjust an item in the tree. Nothing deletes the item.
-- Cannot move the item to a different tree.
-> [TreeItem]
-- ^ New items to add to the tree.
-> (TreeContent -> TreeContent -> TreeContent)
-- ^ When adding a new item to the tree and an item with the same
-- name already exists, this function picks which to use.
-- The first one is the item that was already in the tree.
2016-03-11 20:30:06 +00:00
-> [TopFilePath]
-- ^ Files to remove from the tree.
-> Ref
-> Repo
-> m Sha
adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo =
2016-03-11 20:30:06 +00:00
withMkTreeHandle repo $ \h -> do
(l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo
(l', _, _) <- go h False [] 1 inTopTree l
l'' <- adjustlist h 0 inTopTree topTreePath l'
sha <- liftIO $ mkTree h l''
2016-03-11 20:30:06 +00:00
void $ liftIO cleanup
return sha
where
go _ wasmodified c _ _ [] = return (c, wasmodified, [])
go h wasmodified c depth intree (i:is)
2016-03-11 18:46:54 +00:00
| intree i = case readObjectType (LsTree.typeobj i) of
Just BlobObject -> do
let ti = TreeItem (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
v <- adjusttreeitem ti
case v of
Nothing -> go h True c depth intree is
2016-03-11 18:46:54 +00:00
Just ti'@(TreeItem f m s) ->
let !modified = wasmodified || ti' /= ti
blob = TreeBlob f m s
in go h modified (blob:c) depth intree is
2016-03-11 18:46:54 +00:00
Just TreeObject -> do
(sl, modified, is') <- go h False [] (depth+1) (beneathSubTree i) is
sl' <- adjustlist h depth (inTree i) (gitPath i) sl
let slmodified = sl' /= sl
subtree <- if modified || slmodified
2016-03-11 20:45:40 +00:00
then liftIO $ recordSubTree h $ NewSubTree (LsTree.file i) sl'
2016-03-11 18:46:54 +00:00
else return $ RecordedSubTree (LsTree.file i) (LsTree.sha i) []
let !modified' = modified || slmodified || wasmodified
go h modified' (subtree : c) depth intree is'
Just CommitObject -> do
let ti = TreeItem (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
v <- adjusttreeitem ti
case v of
Nothing -> go h True c depth intree is
Just (TreeItem f m s) ->
let commit = TreeCommit f m s
in go h wasmodified (commit:c) depth intree is
_ -> giveup ("unexpected object type \"" ++ decodeBS (LsTree.typeobj i) ++ "\"")
| otherwise = return (c, wasmodified, i:is)
2016-03-11 23:29:43 +00:00
adjustlist h depth ishere herepath l = do
let addhere = fromMaybe [] $ M.lookup herepath addtreeitempathmap
2016-03-11 23:29:43 +00:00
let l' = filter (not . removed) $
addoldnew l (map treeItemToTreeContent addhere)
2016-03-11 23:29:43 +00:00
let inl i = any (\t -> beneathSubTree t i) l'
let (Tree addunderhere) = flattenTree depth $ treeItemsToTree $
filter (not . inl) $ if herepath == topTreePath
then filter (not . ishere) addtreeitems
else fromMaybe [] $
M.lookup (subTreePrefix herepath) addtreeitemprefixmap
2016-03-11 23:29:43 +00:00
addunderhere' <- liftIO $ mapM (recordSubTree h) addunderhere
return (addoldnew l' addunderhere')
2016-03-11 23:29:43 +00:00
addtreeitempathmap = mkPathMap addtreeitems
addtreeitemprefixmap = mkSubTreePathPrefixMap addtreeitems
removeset = S.fromList $ map (P.normalise . gitPath) removefiles
removed (TreeBlob f _ _) = S.member (P.normalise (gitPath f)) removeset
removed (TreeCommit f _ _) = S.member (P.normalise (gitPath f)) removeset
removed (RecordedSubTree _ _ _) = False
removed (NewSubTree _ _) = False
addoldnew [] new = new
addoldnew old [] = old
addoldnew old new = addoldnew' (M.fromList $ map (\i -> (mkk i, i)) old) new
addoldnew' oldm (n:ns) =
let k = mkk n
in case M.lookup k oldm of
Just o ->
resolveaddconflict o n
:
addoldnew' (M.delete k oldm) ns
Nothing -> n : addoldnew' oldm ns
addoldnew' oldm [] = M.elems oldm
mkk = P.normalise . gitPath
{- Grafts subtree into the basetree at the specified location, replacing
- anything that the basetree already had at that location.
-
- This is generally much more efficient than using getTree and recordTree,
- or adjustTree, since it only needs to traverse from the top of the tree
- down to the graft location. It does not buffer the whole tree in memory.
-}
graftTree
:: Sha
-> TopFilePath
-> Sha
-> Repo
-> IO Sha
graftTree subtree graftloc basetree repo =
withMkTreeHandle repo $ graftTree' subtree graftloc basetree repo
graftTree'
:: Sha
-> TopFilePath
-> Sha
-> Repo
-> MkTreeHandle
-> IO Sha
graftTree' subtree graftloc basetree repo hdl = go basetree subdirs graftdirs
where
go tsha (subdir:restsubdirs) (topmostgraphdir:restgraphdirs) = do
Tree t <- getTree LsTree.LsTreeNonRecursive tsha repo
let abovegraftpoint i = gitPath i == gitPath subdir
t' <- case partition abovegraftpoint t of
-- the graft point is not already in the tree,
-- so graft it in, keeping the existing tree
-- content
([], _) -> do
graft <- graftin (topmostgraphdir:restgraphdirs)
return (graft:t)
(matching, rest) -> do
newshas <- forM matching $ \case
RecordedSubTree tloc tsha' _
| null restgraphdirs -> return $
RecordedSubTree tloc subtree []
| otherwise -> do
tsha'' <- go tsha' restsubdirs restgraphdirs
return $ RecordedSubTree tloc tsha'' []
_ -> graftin (topmostgraphdir:restgraphdirs)
return (newshas ++ rest)
mkTree hdl t'
go _ _ [] = return subtree
go _ [] _ = return subtree
graftin t = recordSubTree hdl $ graftin' t
graftin' [] = RecordedSubTree graftloc subtree []
graftin' (d:rest)
| d == graftloc = graftin' []
| otherwise = NewSubTree d [graftin' rest]
subdirs = P.splitDirectories $ gitPath graftloc
graftdirs = map (asTopFilePath . toInternalGitPath) $
pathPrefixes subdirs
{- Assumes the list is ordered, with tree objects coming right before their
- contents. -}
extractTree :: [LsTree.TreeItem] -> Either String Tree
2016-03-11 18:08:06 +00:00
extractTree l = case go [] inTopTree l of
Right (t, []) -> Right (Tree t)
Right _ -> parseerr "unexpected tree form"
Left e -> parseerr e
where
go t _ [] = Right (t, [])
2016-02-24 01:56:03 +00:00
go t intree (i:is)
2016-03-11 18:46:54 +00:00
| intree i = case readObjectType (LsTree.typeobj i) of
Just BlobObject ->
let b = TreeBlob (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
in go (b:t) intree is
Just TreeObject -> case go [] (beneathSubTree i) is of
Right (subtree, is') ->
let st = RecordedSubTree (LsTree.file i) (LsTree.sha i) subtree
in go (st:t) intree is'
Left e -> Left e
Just CommitObject ->
let c = TreeCommit (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
in go (c:t) intree is
_ -> parseerr ("unexpected object type \"" ++ decodeBS (LsTree.typeobj i) ++ "\"")
| otherwise = Right (t, i:is)
parseerr = Left
2016-02-24 01:56:03 +00:00
2016-03-11 23:29:43 +00:00
class GitPath t where
gitPath :: t -> RawFilePath
2016-03-11 23:29:43 +00:00
instance GitPath RawFilePath where
2016-03-11 23:29:43 +00:00
gitPath = id
instance GitPath FilePath where
gitPath = toRawFilePath
2016-03-11 23:29:43 +00:00
instance GitPath TopFilePath where
gitPath = getTopFilePath
2016-02-24 01:56:03 +00:00
2016-03-11 23:29:43 +00:00
instance GitPath TreeItem where
gitPath (TreeItem f _ _) = gitPath f
2016-03-11 23:29:43 +00:00
instance GitPath LsTree.TreeItem where
gitPath = gitPath . LsTree.file
2016-02-24 01:56:03 +00:00
2016-03-11 23:29:43 +00:00
instance GitPath TreeContent where
gitPath (TreeBlob f _ _) = gitPath f
gitPath (RecordedSubTree f _ _) = gitPath f
gitPath (NewSubTree f _) = gitPath f
gitPath (TreeCommit f _ _) = gitPath f
2016-03-11 18:08:06 +00:00
2016-03-11 23:29:43 +00:00
inTopTree :: GitPath t => t -> Bool
inTopTree = inTree topTreePath
topTreePath :: RawFilePath
topTreePath = "."
2016-03-11 20:30:06 +00:00
2016-03-11 23:29:43 +00:00
inTree :: (GitPath t, GitPath f) => t -> f -> Bool
inTree t f = gitPath t == P.takeDirectory (gitPath f)
2016-03-11 23:29:43 +00:00
beneathSubTree :: (GitPath t, GitPath f) => t -> f -> Bool
beneathSubTree t f = subTreePrefix t `B.isPrefixOf` subTreePath f
subTreePath :: GitPath t => t -> RawFilePath
subTreePath = P.normalise . gitPath
subTreePrefix :: GitPath t => t -> RawFilePath
subTreePrefix t
| B.null tp = tp
| otherwise = P.addTrailingPathSeparator (P.normalise tp)
2016-03-11 23:29:43 +00:00
where
tp = gitPath t
{- Makes a Map where the keys are directories, and the values
- are the items located in that directory.
-
- Values that are not in any subdirectory are placed in
- the topTreePath key.
-}
mkPathMap :: GitPath t => [t] -> M.Map RawFilePath [t]
mkPathMap l = M.fromListWith (++) $
map (\ti -> (P.takeDirectory (gitPath ti), [ti])) l
{- Input is eg splitDirectories "foo/bar/baz",
- for which it will output ["foo", "foo/bar", "foo/bar/baz"] -}
pathPrefixes :: [RawFilePath] -> [RawFilePath]
pathPrefixes = go []
where
go _ [] = []
go base (d:rest) = (P.joinPath base P.</> d) : go (base ++ [d]) rest
{- Makes a Map where the keys are all subtree path prefixes,
- and the values are items with that subtree path prefix.
-}
mkSubTreePathPrefixMap :: GitPath t => [t] -> M.Map RawFilePath [t]
mkSubTreePathPrefixMap l = M.fromListWith (++) $ concatMap go l
where
go ti = map (\p -> (p, [ti]))
(map subTreePrefix $ pathPrefixes $ P.splitDirectories $ subTreePath ti)