diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 5690429964..b920b4a94f 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -51,6 +51,7 @@ import qualified Git.Branch import qualified Git.UnionMerge import qualified Git.UpdateIndex import qualified Git.Tree +import qualified Git.LsTree import Git.LsTree (lsTreeParams) import qualified Git.HashObject import Annex.HashObject @@ -366,7 +367,7 @@ branchFiles = withIndex $ inRepo branchFiles' branchFiles' :: Git.Repo -> IO [FilePath] branchFiles' = Git.Command.pipeNullSplitZombie - (lsTreeParams fullname [Param "--name-only"]) + (lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"]) {- Populates the branch's index file with the current branch contents. - @@ -649,7 +650,8 @@ graftTreeish :: Git.Ref -> TopFilePath -> Annex () graftTreeish treeish graftpoint = lockJournal $ \jl -> do branchref <- getBranch updateIndex jl branchref - Git.Tree.Tree t <- inRepo $ Git.Tree.getTree branchref + Git.Tree.Tree t <- inRepo $ + Git.Tree.getTree Git.LsTree.LsTreeRecursive branchref t' <- inRepo $ Git.Tree.recordTree $ Git.Tree.Tree $ Git.Tree.RecordedSubTree graftpoint treeish [] : t c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit diff --git a/Annex/Import.hs b/Annex/Import.hs new file mode 100644 index 0000000000..a737e41515 --- /dev/null +++ b/Annex/Import.hs @@ -0,0 +1,71 @@ +{- git-annex import from remotes + - + - Copyright 2019 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Import (buildImportCommit) where + +import Annex.Common +import Types.Import +import Git.Types +import Git.Tree +import Git.Branch +import Git.FilePath +import Annex.Link + +{- Builds a commit on top of a basecommit that reflects changes to the + - content of a remote. When there are no changes to commit, returns Nothing. + - + - When a remote provided a history of versions of files, + - builds a corresponding tree of git commits. + - + - After importing from a remote, exporting the same thing back to the + - remote should be a no-op. So, the export log is updated to reflect the + - imported tree. + - + - The files are imported to the top of the git repository, unless a + - subdir is specified, then the import will only affect the contents of + - the subdir. + - + - This does not import any content from a remote. But since it needs the + - Key of imported files to be known, its caller will have to download + - new files in order to generate keys for them. + -} +buildImportCommit + :: Ref + -> Maybe FilePath + -> ImportableContents Key + -> CommitMode + -> String + -> Annex (Maybe Ref) +buildImportCommit basecommit subdir importable commitmode commitmessage = do + go =<< buildImportTrees basetree importable + where + go (History importedtree hs) = do + parents <- mapM go hs + + inRepo $ commitTree commitmode commitmessage parents tree + +data History t = History t [History t] + +{- Builds a history of git trees reflecting the ImportableContents. -} +buildImportTrees + :: Maybe FilePath + -> ImportableContents Key + -> Annex (History Sha) +buildImportTrees subdir i = History + <$> go (importableContents i) + <*> mapM (buildImportTrees subdir basetree) (importableHistory i) + where + go ls = do + is <- mapM mktreeitem ls + inRepo $ recordTree (treeItemsToTree is) + mktreeitem (loc, k) = do + let lf = fromImportLocation loc + let topf = asTopFilePath $ maybe lf (</> lf) subdir + relf <- fromRepo $ fromTopFilePath topf + symlink <- calcRepo $ gitAnnexLink relf k + linksha <- hashSymlink symlink + return $ TreeItem topf (fromTreeItemType TreeSymlink) linksha diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 6dcd75f66e..074496854e 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -83,7 +83,7 @@ scanUnlockedFiles = whenM (isJust <$> inRepo Git.Branch.current) $ do showSideAction "scanning for unlocked files" Database.Keys.runWriter $ liftIO . Database.Keys.SQL.dropAllAssociatedFiles - (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.Ref.headRef + (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef forM_ l $ \i -> when (isregfile i) $ maybe noop (add i) diff --git a/COPYRIGHT b/COPYRIGHT index 4168d62b9b..ef3109d2dd 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name> © 2014 Sören Brunk License: AGPL-3+ -Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs +Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Import.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs Copyright: © 2011-2019 Joey Hess <id@joeyh.name> License: AGPL-3+ diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index dbbde5b968..5dae1ba847 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -228,7 +228,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do runbranchkeys bs = do keyaction <- mkkeyaction forM_ bs $ \b -> do - (l, cleanup) <- inRepo $ LsTree.lsTree b + (l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive b forM_ l $ \i -> do let bfp = mkActionItem $ BranchFilePath b (LsTree.file i) maybe noop (\k -> keyaction (k, bfp)) diff --git a/Command/Export.hs b/Command/Export.hs index d805c5e835..635b2a9dd2 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -193,7 +193,7 @@ mkDiffMap old new db = do -- Returns True when files were uploaded. fillExport :: Remote -> ExportHandle -> Git.Ref -> Annex Bool fillExport r db new = do - (l, cleanup) <- inRepo $ Git.LsTree.lsTree new + (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive new cvar <- liftIO $ newMVar False commandActions $ map (startExport r db cvar) l void $ liftIO $ cleanup diff --git a/Command/Info.hs b/Command/Info.hs index 20b9bb2e58..84b7cad5fa 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -597,7 +597,7 @@ getDirStatInfo o dir = do getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo) getTreeStatInfo o r = do fast <- Annex.getState Annex.fast - (ls, cleanup) <- inRepo $ LsTree.lsTree r + (ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive r (presentdata, referenceddata, repodata) <- go fast ls initial ifM (liftIO cleanup) ( return $ Just $ diff --git a/Git/LsTree.hs b/Git/LsTree.hs index 225f2ce138..f678727379 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -1,6 +1,6 @@ {- git ls-tree interface - - - Copyright 2011-2016 Joey Hess <id@joeyh.name> + - Copyright 2011-2019 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -9,6 +9,7 @@ module Git.LsTree ( TreeItem(..), + LsTreeMode(..), lsTree, lsTree', lsTreeParams, @@ -34,26 +35,30 @@ data TreeItem = TreeItem , file :: TopFilePath } deriving Show -{- Lists the complete contents of a tree, recursing into sub-trees, - - with lazy output. -} -lsTree :: Ref -> Repo -> IO ([TreeItem], IO Bool) +data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive + +{- Lists the contents of a tree, with lazy output. -} +lsTree :: LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool) lsTree = lsTree' [] -lsTree' :: [CommandParam] -> Ref -> Repo -> IO ([TreeItem], IO Bool) -lsTree' ps t repo = do - (l, cleanup) <- pipeNullSplit (lsTreeParams t ps) repo +lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool) +lsTree' ps mode t repo = do + (l, cleanup) <- pipeNullSplit (lsTreeParams mode t ps) repo return (map parseLsTree l, cleanup) -lsTreeParams :: Ref -> [CommandParam] -> [CommandParam] -lsTreeParams r ps = +lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam] +lsTreeParams mode r ps = [ Param "ls-tree" , Param "--full-tree" , Param "-z" - , Param "-r" - ] ++ ps ++ + ] ++ recursiveparams ++ ps ++ [ Param "--" , File $ fromRef r ] + where + recursiveparams = case mode of + LsTreeRecursive -> [ Param "-r" ] + LsTreeNonRecursive -> [] {- Lists specified files in a tree. -} lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem] diff --git a/Git/Repair.hs b/Git/Repair.hs index c51d63a5d2..5ae4c61f9e 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -341,7 +341,7 @@ verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool verifyTree missing treesha r | S.member treesha missing = return False | otherwise = do - (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha []) r + (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive treesha []) r let objshas = map (LsTree.sha . LsTree.parseLsTree) ls if any (`S.member` missing) objshas then do diff --git a/Git/Tree.hs b/Git/Tree.hs index 9e9b17af2b..f37d78b68b 100644 --- a/Git/Tree.hs +++ b/Git/Tree.hs @@ -13,7 +13,9 @@ module Git.Tree ( getTree, recordTree, TreeItem(..), + treeItemsToTree, adjustTree, + graftTree, treeMode, ) where @@ -47,15 +49,15 @@ data TreeContent deriving (Show, Eq, Ord) {- Gets the Tree for a Ref. -} -getTree :: Ref -> Repo -> IO Tree -getTree r repo = do - (l, cleanup) <- lsTreeWithObjects r repo +getTree :: LsTree.LsTreeMode -> Ref -> Repo -> IO Tree +getTree lstreemode r repo = do + (l, cleanup) <- lsTreeWithObjects lstreemode r repo let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id (extractTree l) void cleanup return t -lsTreeWithObjects :: Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool) +lsTreeWithObjects :: LsTree.LsTreeMode -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool) lsTreeWithObjects = LsTree.lsTree' [Param "-t"] newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle @@ -181,7 +183,7 @@ adjustTree -> m Sha adjustTree adjusttreeitem addtreeitems removefiles r repo = withMkTreeHandle repo $ \h -> do - (l, cleanup) <- liftIO $ lsTreeWithObjects r repo + (l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo (l', _, _) <- go h False [] 1 inTopTree l l'' <- adjustlist h 0 inTopTree (const True) l' sha <- liftIO $ mkTree h l'' @@ -229,6 +231,58 @@ adjustTree adjusttreeitem addtreeitems removefiles r repo = removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset removed _ = False +{- Grafts subtree into the basetree at the specified location. + - + - This is generally much more efficient than using getTree and recordTree, + - or adjustTree, since it only needs to traverse from the top of the tree + - down to the graft location. It does not buffer the whole tree in memory. + -} +graftTree + :: Sha + -> TopFilePath + -> Sha + -> Repo + -> IO Sha +graftTree subtree graftloc basetree repo = + withMkTreeHandle repo $ + go basetree graftdirs + where + go :: Ref -> [TopFilePath] -> MkTreeHandle -> IO Sha + go tsha [] h = do + graft <- graftin h [] + mkTree h [graft] + go tsha graftdirs@(topmostgraphdir:_) h = do + Tree t <- getTree LsTree.LsTreeNonRecursive tsha repo + t' <- case partition isabovegraft t of + ([], _) -> do + graft <- graftin h graftdirs + return (graft:t) + -- normally there can only be one matching item + -- in the tree, but it's theoretically possible + -- for a git tree to have multiple items with the + -- same name, so process them all + (matching, rest) -> do + newshas <- forM matching $ \case + RecordedSubTree tloc tsha' _ -> do + tsha'' <- go tsha' (drop 1 graftdirs) h + return $ RecordedSubTree tloc tsha'' [] + _ -> graftin h $ drop 1 graftdirs + return (newshas ++ rest) + mkTree h t' + + isabovegraft i = beneathSubTree i graftloc || gitPath i == gitPath graftloc + + graftin h t = recordSubTree h $ graftin' t + graftin' [] = RecordedSubTree graftloc subtree [] + graftin' (d:rest) = NewSubTree d [graftin' rest] + + -- For a graftloc of "foo/bar/baz", this generates + -- ["foo", "foo/bar", "foo/bar/baz"] + graftdirs = map (asTopFilePath . toInternalGitPath) $ + mkpaths [] $ splitDirectories $ gitPath graftloc + mkpaths _ [] = [] + mkpaths base (d:rest) = (joinPath base </> d) : mkpaths (base ++ [d]) rest + {- Assumes the list is ordered, with tree objects coming right before their - contents. -} extractTree :: [LsTree.TreeItem] -> Either String Tree diff --git a/Types/Export.hs b/Types/Export.hs index 35b3404240..e10758adef 100644 --- a/Types/Export.hs +++ b/Types/Export.hs @@ -21,7 +21,7 @@ import Utility.Split import qualified System.FilePath.Posix as Posix -- A location on a remote that a key can be exported to. --- The FilePath will be relative to the top of the export, +-- The FilePath will be relative to the top of the remote, -- and uses unix-style path separators. newtype ExportLocation = ExportLocation FilePath deriving (Show, Eq) diff --git a/Types/Import.hs b/Types/Import.hs index d31780167d..5cddabc217 100644 --- a/Types/Import.hs +++ b/Types/Import.hs @@ -19,6 +19,9 @@ import Utility.FileSystemEncoding - location on the remote. -} type ImportLocation = ExportLocation +fromImportLocation :: ImportLocation -> FilePath +fromImportLocation = fromExportLocation + {- An identifier for content stored on a remote that has been imported into - the repository. It should be reasonably short since it is stored in the - git-annex branch. -} @@ -32,10 +35,11 @@ instance Arbitrary ContentIdentifier where arbitrary = ContentIdentifier . encodeBS <$> arbitrary `suchThat` all isAscii -{- List of files that can be imported from a remote. -} -data ImportableContents = ImportableContents - { importableContents :: [(ImportLocation, ContentIdentifier)] - , importableHistory :: [ImportableContents] +{- List of files that can be imported from a remote, each with some added + - information. -} +data ImportableContents info = ImportableContents + { importableContents :: [(ImportLocation, info)] + , importableHistory :: [ImportableContents info] -- ^ Used by remotes that support importing historical versions of -- files that are stored in them. This is equivilant to a git -- commit history. diff --git a/Types/Remote.hs b/Types/Remote.hs index 4a010041bd..87e1bbd38b 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -247,7 +247,7 @@ data ImportActions a = ImportActions -- -- May also find old versions of files that are still stored in the -- remote. - { listImportableContents :: a (Maybe ImportableContents) + { listImportableContents :: a (Maybe (ImportableContents ContentIdentifier)) -- Retrieves a file from the remote. Ensures that the file -- it retrieves has the requested ContentIdentifier. -- diff --git a/doc/todo/import_tree.mdwn b/doc/todo/import_tree.mdwn index c0ea294c2c..16a4808d1a 100644 --- a/doc/todo/import_tree.mdwn +++ b/doc/todo/import_tree.mdwn @@ -16,6 +16,11 @@ this. It will only need to be updated when listContents returns a ContentIdentifier that is not already known in the database. +* When on an adjusted unlocked branch, need to import the files unlocked. + +* What if the remote lists importable filenames that are absolute paths, + or contain a "../" attack? + ## race conditions (Some thoughts about races that the design should cover now, but kept here diff --git a/git-annex.cabal b/git-annex.cabal index 0756fdec1a..72d36a009f 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -630,6 +630,7 @@ Executable git-annex Annex.GitOverlay Annex.HashObject Annex.Hook + Annex.Import Annex.Ingest Annex.Init Annex.InodeSentinal