Added graftTree but it's buggy.

Should use graftTree in Annex.Branch.graftTreeish; it will be faster
than the current implementation there.

Started Annex.Import, but untested and it doesn't yet handle tree
grafting.
This commit is contained in:
Joey Hess 2019-02-21 17:32:59 -04:00
parent 56137ce0d2
commit 8fdea8f444
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
15 changed files with 172 additions and 30 deletions

View file

@ -51,6 +51,7 @@ import qualified Git.Branch
import qualified Git.UnionMerge
import qualified Git.UpdateIndex
import qualified Git.Tree
import qualified Git.LsTree
import Git.LsTree (lsTreeParams)
import qualified Git.HashObject
import Annex.HashObject
@ -366,7 +367,7 @@ branchFiles = withIndex $ inRepo branchFiles'
branchFiles' :: Git.Repo -> IO [FilePath]
branchFiles' = Git.Command.pipeNullSplitZombie
(lsTreeParams fullname [Param "--name-only"])
(lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"])
{- Populates the branch's index file with the current branch contents.
-
@ -649,7 +650,8 @@ graftTreeish :: Git.Ref -> TopFilePath -> Annex ()
graftTreeish treeish graftpoint = lockJournal $ \jl -> do
branchref <- getBranch
updateIndex jl branchref
Git.Tree.Tree t <- inRepo $ Git.Tree.getTree branchref
Git.Tree.Tree t <- inRepo $
Git.Tree.getTree Git.LsTree.LsTreeRecursive branchref
t' <- inRepo $ Git.Tree.recordTree $ Git.Tree.Tree $
Git.Tree.RecordedSubTree graftpoint treeish [] : t
c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit

71
Annex/Import.hs Normal file
View file

@ -0,0 +1,71 @@
{- git-annex import from remotes
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Import (buildImportCommit) where
import Annex.Common
import Types.Import
import Git.Types
import Git.Tree
import Git.Branch
import Git.FilePath
import Annex.Link
{- Builds a commit on top of a basecommit that reflects changes to the
- content of a remote. When there are no changes to commit, returns Nothing.
-
- When a remote provided a history of versions of files,
- builds a corresponding tree of git commits.
-
- After importing from a remote, exporting the same thing back to the
- remote should be a no-op. So, the export log is updated to reflect the
- imported tree.
-
- The files are imported to the top of the git repository, unless a
- subdir is specified, then the import will only affect the contents of
- the subdir.
-
- This does not import any content from a remote. But since it needs the
- Key of imported files to be known, its caller will have to download
- new files in order to generate keys for them.
-}
buildImportCommit
:: Ref
-> Maybe FilePath
-> ImportableContents Key
-> CommitMode
-> String
-> Annex (Maybe Ref)
buildImportCommit basecommit subdir importable commitmode commitmessage = do
go =<< buildImportTrees basetree importable
where
go (History importedtree hs) = do
parents <- mapM go hs
inRepo $ commitTree commitmode commitmessage parents tree
data History t = History t [History t]
{- Builds a history of git trees reflecting the ImportableContents. -}
buildImportTrees
:: Maybe FilePath
-> ImportableContents Key
-> Annex (History Sha)
buildImportTrees subdir i = History
<$> go (importableContents i)
<*> mapM (buildImportTrees subdir basetree) (importableHistory i)
where
go ls = do
is <- mapM mktreeitem ls
inRepo $ recordTree (treeItemsToTree is)
mktreeitem (loc, k) = do
let lf = fromImportLocation loc
let topf = asTopFilePath $ maybe lf (</> lf) subdir
relf <- fromRepo $ fromTopFilePath topf
symlink <- calcRepo $ gitAnnexLink relf k
linksha <- hashSymlink symlink
return $ TreeItem topf (fromTreeItemType TreeSymlink) linksha

View file

@ -83,7 +83,7 @@ scanUnlockedFiles = whenM (isJust <$> inRepo Git.Branch.current) $ do
showSideAction "scanning for unlocked files"
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.Ref.headRef
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef
forM_ l $ \i ->
when (isregfile i) $
maybe noop (add i)

View file

@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name>
© 2014 Sören Brunk
License: AGPL-3+
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Import.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Copyright: © 2011-2019 Joey Hess <id@joeyh.name>
License: AGPL-3+

View file

@ -228,7 +228,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do
runbranchkeys bs = do
keyaction <- mkkeyaction
forM_ bs $ \b -> do
(l, cleanup) <- inRepo $ LsTree.lsTree b
(l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive b
forM_ l $ \i -> do
let bfp = mkActionItem $ BranchFilePath b (LsTree.file i)
maybe noop (\k -> keyaction (k, bfp))

View file

@ -193,7 +193,7 @@ mkDiffMap old new db = do
-- Returns True when files were uploaded.
fillExport :: Remote -> ExportHandle -> Git.Ref -> Annex Bool
fillExport r db new = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree new
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive new
cvar <- liftIO $ newMVar False
commandActions $ map (startExport r db cvar) l
void $ liftIO $ cleanup

View file

@ -597,7 +597,7 @@ getDirStatInfo o dir = do
getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo)
getTreeStatInfo o r = do
fast <- Annex.getState Annex.fast
(ls, cleanup) <- inRepo $ LsTree.lsTree r
(ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive r
(presentdata, referenceddata, repodata) <- go fast ls initial
ifM (liftIO cleanup)
( return $ Just $

View file

@ -1,6 +1,6 @@
{- git ls-tree interface
-
- Copyright 2011-2016 Joey Hess <id@joeyh.name>
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -9,6 +9,7 @@
module Git.LsTree (
TreeItem(..),
LsTreeMode(..),
lsTree,
lsTree',
lsTreeParams,
@ -34,26 +35,30 @@ data TreeItem = TreeItem
, file :: TopFilePath
} deriving Show
{- Lists the complete contents of a tree, recursing into sub-trees,
- with lazy output. -}
lsTree :: Ref -> Repo -> IO ([TreeItem], IO Bool)
data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive
{- Lists the contents of a tree, with lazy output. -}
lsTree :: LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree = lsTree' []
lsTree' :: [CommandParam] -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams t ps) repo
lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps mode t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams mode t ps) repo
return (map parseLsTree l, cleanup)
lsTreeParams :: Ref -> [CommandParam] -> [CommandParam]
lsTreeParams r ps =
lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam]
lsTreeParams mode r ps =
[ Param "ls-tree"
, Param "--full-tree"
, Param "-z"
, Param "-r"
] ++ ps ++
] ++ recursiveparams ++ ps ++
[ Param "--"
, File $ fromRef r
]
where
recursiveparams = case mode of
LsTreeRecursive -> [ Param "-r" ]
LsTreeNonRecursive -> []
{- Lists specified files in a tree. -}
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem]

View file

@ -341,7 +341,7 @@ verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r
| S.member treesha missing = return False
| otherwise = do
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha []) r
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive treesha []) r
let objshas = map (LsTree.sha . LsTree.parseLsTree) ls
if any (`S.member` missing) objshas
then do

View file

@ -13,7 +13,9 @@ module Git.Tree (
getTree,
recordTree,
TreeItem(..),
treeItemsToTree,
adjustTree,
graftTree,
treeMode,
) where
@ -47,15 +49,15 @@ data TreeContent
deriving (Show, Eq, Ord)
{- Gets the Tree for a Ref. -}
getTree :: Ref -> Repo -> IO Tree
getTree r repo = do
(l, cleanup) <- lsTreeWithObjects r repo
getTree :: LsTree.LsTreeMode -> Ref -> Repo -> IO Tree
getTree lstreemode r repo = do
(l, cleanup) <- lsTreeWithObjects lstreemode r repo
let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id
(extractTree l)
void cleanup
return t
lsTreeWithObjects :: Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects :: LsTree.LsTreeMode -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects = LsTree.lsTree' [Param "-t"]
newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle
@ -181,7 +183,7 @@ adjustTree
-> m Sha
adjustTree adjusttreeitem addtreeitems removefiles r repo =
withMkTreeHandle repo $ \h -> do
(l, cleanup) <- liftIO $ lsTreeWithObjects r repo
(l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo
(l', _, _) <- go h False [] 1 inTopTree l
l'' <- adjustlist h 0 inTopTree (const True) l'
sha <- liftIO $ mkTree h l''
@ -229,6 +231,58 @@ adjustTree adjusttreeitem addtreeitems removefiles r repo =
removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset
removed _ = False
{- Grafts subtree into the basetree at the specified location.
-
- This is generally much more efficient than using getTree and recordTree,
- or adjustTree, since it only needs to traverse from the top of the tree
- down to the graft location. It does not buffer the whole tree in memory.
-}
graftTree
:: Sha
-> TopFilePath
-> Sha
-> Repo
-> IO Sha
graftTree subtree graftloc basetree repo =
withMkTreeHandle repo $
go basetree graftdirs
where
go :: Ref -> [TopFilePath] -> MkTreeHandle -> IO Sha
go tsha [] h = do
graft <- graftin h []
mkTree h [graft]
go tsha graftdirs@(topmostgraphdir:_) h = do
Tree t <- getTree LsTree.LsTreeNonRecursive tsha repo
t' <- case partition isabovegraft t of
([], _) -> do
graft <- graftin h graftdirs
return (graft:t)
-- normally there can only be one matching item
-- in the tree, but it's theoretically possible
-- for a git tree to have multiple items with the
-- same name, so process them all
(matching, rest) -> do
newshas <- forM matching $ \case
RecordedSubTree tloc tsha' _ -> do
tsha'' <- go tsha' (drop 1 graftdirs) h
return $ RecordedSubTree tloc tsha'' []
_ -> graftin h $ drop 1 graftdirs
return (newshas ++ rest)
mkTree h t'
isabovegraft i = beneathSubTree i graftloc || gitPath i == gitPath graftloc
graftin h t = recordSubTree h $ graftin' t
graftin' [] = RecordedSubTree graftloc subtree []
graftin' (d:rest) = NewSubTree d [graftin' rest]
-- For a graftloc of "foo/bar/baz", this generates
-- ["foo", "foo/bar", "foo/bar/baz"]
graftdirs = map (asTopFilePath . toInternalGitPath) $
mkpaths [] $ splitDirectories $ gitPath graftloc
mkpaths _ [] = []
mkpaths base (d:rest) = (joinPath base </> d) : mkpaths (base ++ [d]) rest
{- Assumes the list is ordered, with tree objects coming right before their
- contents. -}
extractTree :: [LsTree.TreeItem] -> Either String Tree

View file

@ -21,7 +21,7 @@ import Utility.Split
import qualified System.FilePath.Posix as Posix
-- A location on a remote that a key can be exported to.
-- The FilePath will be relative to the top of the export,
-- The FilePath will be relative to the top of the remote,
-- and uses unix-style path separators.
newtype ExportLocation = ExportLocation FilePath
deriving (Show, Eq)

View file

@ -19,6 +19,9 @@ import Utility.FileSystemEncoding
- location on the remote. -}
type ImportLocation = ExportLocation
fromImportLocation :: ImportLocation -> FilePath
fromImportLocation = fromExportLocation
{- An identifier for content stored on a remote that has been imported into
- the repository. It should be reasonably short since it is stored in the
- git-annex branch. -}
@ -32,10 +35,11 @@ instance Arbitrary ContentIdentifier where
arbitrary = ContentIdentifier . encodeBS
<$> arbitrary `suchThat` all isAscii
{- List of files that can be imported from a remote. -}
data ImportableContents = ImportableContents
{ importableContents :: [(ImportLocation, ContentIdentifier)]
, importableHistory :: [ImportableContents]
{- List of files that can be imported from a remote, each with some added
- information. -}
data ImportableContents info = ImportableContents
{ importableContents :: [(ImportLocation, info)]
, importableHistory :: [ImportableContents info]
-- ^ Used by remotes that support importing historical versions of
-- files that are stored in them. This is equivilant to a git
-- commit history.

View file

@ -247,7 +247,7 @@ data ImportActions a = ImportActions
--
-- May also find old versions of files that are still stored in the
-- remote.
{ listImportableContents :: a (Maybe ImportableContents)
{ listImportableContents :: a (Maybe (ImportableContents ContentIdentifier))
-- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier.
--

View file

@ -16,6 +16,11 @@ this.
It will only need to be updated when listContents returns a
ContentIdentifier that is not already known in the database.
* When on an adjusted unlocked branch, need to import the files unlocked.
* What if the remote lists importable filenames that are absolute paths,
or contain a "../" attack?
## race conditions
(Some thoughts about races that the design should cover now, but kept here

View file

@ -630,6 +630,7 @@ Executable git-annex
Annex.GitOverlay
Annex.HashObject
Annex.Hook
Annex.Import
Annex.Ingest
Annex.Init
Annex.InodeSentinal