Added graftTree but it's buggy.

Should use graftTree in Annex.Branch.graftTreeish; it will be faster
than the current implementation there.

Started Annex.Import, but untested and it doesn't yet handle tree
grafting.
This commit is contained in:
Joey Hess 2019-02-21 17:32:59 -04:00
parent 56137ce0d2
commit 8fdea8f444
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
15 changed files with 172 additions and 30 deletions

View file

@ -51,6 +51,7 @@ import qualified Git.Branch
import qualified Git.UnionMerge import qualified Git.UnionMerge
import qualified Git.UpdateIndex import qualified Git.UpdateIndex
import qualified Git.Tree import qualified Git.Tree
import qualified Git.LsTree
import Git.LsTree (lsTreeParams) import Git.LsTree (lsTreeParams)
import qualified Git.HashObject import qualified Git.HashObject
import Annex.HashObject import Annex.HashObject
@ -366,7 +367,7 @@ branchFiles = withIndex $ inRepo branchFiles'
branchFiles' :: Git.Repo -> IO [FilePath] branchFiles' :: Git.Repo -> IO [FilePath]
branchFiles' = Git.Command.pipeNullSplitZombie branchFiles' = Git.Command.pipeNullSplitZombie
(lsTreeParams fullname [Param "--name-only"]) (lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"])
{- Populates the branch's index file with the current branch contents. {- Populates the branch's index file with the current branch contents.
- -
@ -649,7 +650,8 @@ graftTreeish :: Git.Ref -> TopFilePath -> Annex ()
graftTreeish treeish graftpoint = lockJournal $ \jl -> do graftTreeish treeish graftpoint = lockJournal $ \jl -> do
branchref <- getBranch branchref <- getBranch
updateIndex jl branchref updateIndex jl branchref
Git.Tree.Tree t <- inRepo $ Git.Tree.getTree branchref Git.Tree.Tree t <- inRepo $
Git.Tree.getTree Git.LsTree.LsTreeRecursive branchref
t' <- inRepo $ Git.Tree.recordTree $ Git.Tree.Tree $ t' <- inRepo $ Git.Tree.recordTree $ Git.Tree.Tree $
Git.Tree.RecordedSubTree graftpoint treeish [] : t Git.Tree.RecordedSubTree graftpoint treeish [] : t
c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit

71
Annex/Import.hs Normal file
View file

@ -0,0 +1,71 @@
{- git-annex import from remotes
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Import (buildImportCommit) where
import Annex.Common
import Types.Import
import Git.Types
import Git.Tree
import Git.Branch
import Git.FilePath
import Annex.Link
{- Builds a commit on top of a basecommit that reflects changes to the
- content of a remote. When there are no changes to commit, returns Nothing.
-
- When a remote provided a history of versions of files,
- builds a corresponding tree of git commits.
-
- After importing from a remote, exporting the same thing back to the
- remote should be a no-op. So, the export log is updated to reflect the
- imported tree.
-
- The files are imported to the top of the git repository, unless a
- subdir is specified, then the import will only affect the contents of
- the subdir.
-
- This does not import any content from a remote. But since it needs the
- Key of imported files to be known, its caller will have to download
- new files in order to generate keys for them.
-}
buildImportCommit
:: Ref
-> Maybe FilePath
-> ImportableContents Key
-> CommitMode
-> String
-> Annex (Maybe Ref)
buildImportCommit basecommit subdir importable commitmode commitmessage = do
go =<< buildImportTrees basetree importable
where
go (History importedtree hs) = do
parents <- mapM go hs
inRepo $ commitTree commitmode commitmessage parents tree
data History t = History t [History t]
{- Builds a history of git trees reflecting the ImportableContents. -}
buildImportTrees
:: Maybe FilePath
-> ImportableContents Key
-> Annex (History Sha)
buildImportTrees subdir i = History
<$> go (importableContents i)
<*> mapM (buildImportTrees subdir basetree) (importableHistory i)
where
go ls = do
is <- mapM mktreeitem ls
inRepo $ recordTree (treeItemsToTree is)
mktreeitem (loc, k) = do
let lf = fromImportLocation loc
let topf = asTopFilePath $ maybe lf (</> lf) subdir
relf <- fromRepo $ fromTopFilePath topf
symlink <- calcRepo $ gitAnnexLink relf k
linksha <- hashSymlink symlink
return $ TreeItem topf (fromTreeItemType TreeSymlink) linksha

View file

@ -83,7 +83,7 @@ scanUnlockedFiles = whenM (isJust <$> inRepo Git.Branch.current) $ do
showSideAction "scanning for unlocked files" showSideAction "scanning for unlocked files"
Database.Keys.runWriter $ Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles liftIO . Database.Keys.SQL.dropAllAssociatedFiles
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.Ref.headRef (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef
forM_ l $ \i -> forM_ l $ \i ->
when (isregfile i) $ when (isregfile i) $
maybe noop (add i) maybe noop (add i)

View file

@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name>
© 2014 Sören Brunk © 2014 Sören Brunk
License: AGPL-3+ License: AGPL-3+
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Import.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Copyright: © 2011-2019 Joey Hess <id@joeyh.name> Copyright: © 2011-2019 Joey Hess <id@joeyh.name>
License: AGPL-3+ License: AGPL-3+

View file

@ -228,7 +228,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do
runbranchkeys bs = do runbranchkeys bs = do
keyaction <- mkkeyaction keyaction <- mkkeyaction
forM_ bs $ \b -> do forM_ bs $ \b -> do
(l, cleanup) <- inRepo $ LsTree.lsTree b (l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive b
forM_ l $ \i -> do forM_ l $ \i -> do
let bfp = mkActionItem $ BranchFilePath b (LsTree.file i) let bfp = mkActionItem $ BranchFilePath b (LsTree.file i)
maybe noop (\k -> keyaction (k, bfp)) maybe noop (\k -> keyaction (k, bfp))

View file

@ -193,7 +193,7 @@ mkDiffMap old new db = do
-- Returns True when files were uploaded. -- Returns True when files were uploaded.
fillExport :: Remote -> ExportHandle -> Git.Ref -> Annex Bool fillExport :: Remote -> ExportHandle -> Git.Ref -> Annex Bool
fillExport r db new = do fillExport r db new = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree new (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive new
cvar <- liftIO $ newMVar False cvar <- liftIO $ newMVar False
commandActions $ map (startExport r db cvar) l commandActions $ map (startExport r db cvar) l
void $ liftIO $ cleanup void $ liftIO $ cleanup

View file

@ -597,7 +597,7 @@ getDirStatInfo o dir = do
getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo) getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo)
getTreeStatInfo o r = do getTreeStatInfo o r = do
fast <- Annex.getState Annex.fast fast <- Annex.getState Annex.fast
(ls, cleanup) <- inRepo $ LsTree.lsTree r (ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive r
(presentdata, referenceddata, repodata) <- go fast ls initial (presentdata, referenceddata, repodata) <- go fast ls initial
ifM (liftIO cleanup) ifM (liftIO cleanup)
( return $ Just $ ( return $ Just $

View file

@ -1,6 +1,6 @@
{- git ls-tree interface {- git ls-tree interface
- -
- Copyright 2011-2016 Joey Hess <id@joeyh.name> - Copyright 2011-2019 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -9,6 +9,7 @@
module Git.LsTree ( module Git.LsTree (
TreeItem(..), TreeItem(..),
LsTreeMode(..),
lsTree, lsTree,
lsTree', lsTree',
lsTreeParams, lsTreeParams,
@ -34,26 +35,30 @@ data TreeItem = TreeItem
, file :: TopFilePath , file :: TopFilePath
} deriving Show } deriving Show
{- Lists the complete contents of a tree, recursing into sub-trees, data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive
- with lazy output. -}
lsTree :: Ref -> Repo -> IO ([TreeItem], IO Bool) {- Lists the contents of a tree, with lazy output. -}
lsTree :: LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree = lsTree' [] lsTree = lsTree' []
lsTree' :: [CommandParam] -> Ref -> Repo -> IO ([TreeItem], IO Bool) lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps t repo = do lsTree' ps mode t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams t ps) repo (l, cleanup) <- pipeNullSplit (lsTreeParams mode t ps) repo
return (map parseLsTree l, cleanup) return (map parseLsTree l, cleanup)
lsTreeParams :: Ref -> [CommandParam] -> [CommandParam] lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam]
lsTreeParams r ps = lsTreeParams mode r ps =
[ Param "ls-tree" [ Param "ls-tree"
, Param "--full-tree" , Param "--full-tree"
, Param "-z" , Param "-z"
, Param "-r" ] ++ recursiveparams ++ ps ++
] ++ ps ++
[ Param "--" [ Param "--"
, File $ fromRef r , File $ fromRef r
] ]
where
recursiveparams = case mode of
LsTreeRecursive -> [ Param "-r" ]
LsTreeNonRecursive -> []
{- Lists specified files in a tree. -} {- Lists specified files in a tree. -}
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem] lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem]

View file

@ -341,7 +341,7 @@ verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r verifyTree missing treesha r
| S.member treesha missing = return False | S.member treesha missing = return False
| otherwise = do | otherwise = do
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha []) r (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive treesha []) r
let objshas = map (LsTree.sha . LsTree.parseLsTree) ls let objshas = map (LsTree.sha . LsTree.parseLsTree) ls
if any (`S.member` missing) objshas if any (`S.member` missing) objshas
then do then do

View file

@ -13,7 +13,9 @@ module Git.Tree (
getTree, getTree,
recordTree, recordTree,
TreeItem(..), TreeItem(..),
treeItemsToTree,
adjustTree, adjustTree,
graftTree,
treeMode, treeMode,
) where ) where
@ -47,15 +49,15 @@ data TreeContent
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
{- Gets the Tree for a Ref. -} {- Gets the Tree for a Ref. -}
getTree :: Ref -> Repo -> IO Tree getTree :: LsTree.LsTreeMode -> Ref -> Repo -> IO Tree
getTree r repo = do getTree lstreemode r repo = do
(l, cleanup) <- lsTreeWithObjects r repo (l, cleanup) <- lsTreeWithObjects lstreemode r repo
let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id
(extractTree l) (extractTree l)
void cleanup void cleanup
return t return t
lsTreeWithObjects :: Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool) lsTreeWithObjects :: LsTree.LsTreeMode -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects = LsTree.lsTree' [Param "-t"] lsTreeWithObjects = LsTree.lsTree' [Param "-t"]
newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle
@ -181,7 +183,7 @@ adjustTree
-> m Sha -> m Sha
adjustTree adjusttreeitem addtreeitems removefiles r repo = adjustTree adjusttreeitem addtreeitems removefiles r repo =
withMkTreeHandle repo $ \h -> do withMkTreeHandle repo $ \h -> do
(l, cleanup) <- liftIO $ lsTreeWithObjects r repo (l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo
(l', _, _) <- go h False [] 1 inTopTree l (l', _, _) <- go h False [] 1 inTopTree l
l'' <- adjustlist h 0 inTopTree (const True) l' l'' <- adjustlist h 0 inTopTree (const True) l'
sha <- liftIO $ mkTree h l'' sha <- liftIO $ mkTree h l''
@ -229,6 +231,58 @@ adjustTree adjusttreeitem addtreeitems removefiles r repo =
removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset
removed _ = False removed _ = False
{- Grafts subtree into the basetree at the specified location.
-
- This is generally much more efficient than using getTree and recordTree,
- or adjustTree, since it only needs to traverse from the top of the tree
- down to the graft location. It does not buffer the whole tree in memory.
-}
graftTree
:: Sha
-> TopFilePath
-> Sha
-> Repo
-> IO Sha
graftTree subtree graftloc basetree repo =
withMkTreeHandle repo $
go basetree graftdirs
where
go :: Ref -> [TopFilePath] -> MkTreeHandle -> IO Sha
go tsha [] h = do
graft <- graftin h []
mkTree h [graft]
go tsha graftdirs@(topmostgraphdir:_) h = do
Tree t <- getTree LsTree.LsTreeNonRecursive tsha repo
t' <- case partition isabovegraft t of
([], _) -> do
graft <- graftin h graftdirs
return (graft:t)
-- normally there can only be one matching item
-- in the tree, but it's theoretically possible
-- for a git tree to have multiple items with the
-- same name, so process them all
(matching, rest) -> do
newshas <- forM matching $ \case
RecordedSubTree tloc tsha' _ -> do
tsha'' <- go tsha' (drop 1 graftdirs) h
return $ RecordedSubTree tloc tsha'' []
_ -> graftin h $ drop 1 graftdirs
return (newshas ++ rest)
mkTree h t'
isabovegraft i = beneathSubTree i graftloc || gitPath i == gitPath graftloc
graftin h t = recordSubTree h $ graftin' t
graftin' [] = RecordedSubTree graftloc subtree []
graftin' (d:rest) = NewSubTree d [graftin' rest]
-- For a graftloc of "foo/bar/baz", this generates
-- ["foo", "foo/bar", "foo/bar/baz"]
graftdirs = map (asTopFilePath . toInternalGitPath) $
mkpaths [] $ splitDirectories $ gitPath graftloc
mkpaths _ [] = []
mkpaths base (d:rest) = (joinPath base </> d) : mkpaths (base ++ [d]) rest
{- Assumes the list is ordered, with tree objects coming right before their {- Assumes the list is ordered, with tree objects coming right before their
- contents. -} - contents. -}
extractTree :: [LsTree.TreeItem] -> Either String Tree extractTree :: [LsTree.TreeItem] -> Either String Tree

View file

@ -21,7 +21,7 @@ import Utility.Split
import qualified System.FilePath.Posix as Posix import qualified System.FilePath.Posix as Posix
-- A location on a remote that a key can be exported to. -- A location on a remote that a key can be exported to.
-- The FilePath will be relative to the top of the export, -- The FilePath will be relative to the top of the remote,
-- and uses unix-style path separators. -- and uses unix-style path separators.
newtype ExportLocation = ExportLocation FilePath newtype ExportLocation = ExportLocation FilePath
deriving (Show, Eq) deriving (Show, Eq)

View file

@ -19,6 +19,9 @@ import Utility.FileSystemEncoding
- location on the remote. -} - location on the remote. -}
type ImportLocation = ExportLocation type ImportLocation = ExportLocation
fromImportLocation :: ImportLocation -> FilePath
fromImportLocation = fromExportLocation
{- An identifier for content stored on a remote that has been imported into {- An identifier for content stored on a remote that has been imported into
- the repository. It should be reasonably short since it is stored in the - the repository. It should be reasonably short since it is stored in the
- git-annex branch. -} - git-annex branch. -}
@ -32,10 +35,11 @@ instance Arbitrary ContentIdentifier where
arbitrary = ContentIdentifier . encodeBS arbitrary = ContentIdentifier . encodeBS
<$> arbitrary `suchThat` all isAscii <$> arbitrary `suchThat` all isAscii
{- List of files that can be imported from a remote. -} {- List of files that can be imported from a remote, each with some added
data ImportableContents = ImportableContents - information. -}
{ importableContents :: [(ImportLocation, ContentIdentifier)] data ImportableContents info = ImportableContents
, importableHistory :: [ImportableContents] { importableContents :: [(ImportLocation, info)]
, importableHistory :: [ImportableContents info]
-- ^ Used by remotes that support importing historical versions of -- ^ Used by remotes that support importing historical versions of
-- files that are stored in them. This is equivilant to a git -- files that are stored in them. This is equivilant to a git
-- commit history. -- commit history.

View file

@ -247,7 +247,7 @@ data ImportActions a = ImportActions
-- --
-- May also find old versions of files that are still stored in the -- May also find old versions of files that are still stored in the
-- remote. -- remote.
{ listImportableContents :: a (Maybe ImportableContents) { listImportableContents :: a (Maybe (ImportableContents ContentIdentifier))
-- Retrieves a file from the remote. Ensures that the file -- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier. -- it retrieves has the requested ContentIdentifier.
-- --

View file

@ -16,6 +16,11 @@ this.
It will only need to be updated when listContents returns a It will only need to be updated when listContents returns a
ContentIdentifier that is not already known in the database. ContentIdentifier that is not already known in the database.
* When on an adjusted unlocked branch, need to import the files unlocked.
* What if the remote lists importable filenames that are absolute paths,
or contain a "../" attack?
## race conditions ## race conditions
(Some thoughts about races that the design should cover now, but kept here (Some thoughts about races that the design should cover now, but kept here

View file

@ -630,6 +630,7 @@ Executable git-annex
Annex.GitOverlay Annex.GitOverlay
Annex.HashObject Annex.HashObject
Annex.Hook Annex.Hook
Annex.Import
Annex.Ingest Annex.Ingest
Annex.Init Annex.Init
Annex.InodeSentinal Annex.InodeSentinal