wip RawFilePath

Goal is to make git-annex faster by using ByteString for all the
worktree traversal. For now, this is focusing on Command.Find,
in order to benchmark how much it helps. (All other commands are
temporarily disabled)

Currently in a very bad unbuildable in-between state.
This commit is contained in:
Joey Hess 2019-11-25 16:18:19 -04:00
parent 1f035c0d66
commit 6a97ff6b3a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
25 changed files with 258 additions and 200 deletions

View file

@ -34,37 +34,40 @@ import Git.Sha
import Numeric
import System.Posix.Types
import qualified Data.ByteString.Lazy as L
{- Scans for files that are checked into git's index at the specified locations. -}
inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
inRepo :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
inRepo = inRepo' []
inRepo' :: [CommandParam] -> [FilePath] -> Repo -> IO ([FilePath], IO Bool)
inRepo' ps l = pipeNullSplit $
Param "ls-files" :
Param "--cached" :
Param "-z" :
ps ++
(Param "--" : map File l)
inRepo' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
inRepo' ps l repo = pipeNullSplit' params repo
where
params =
Param "ls-files" :
Param "--cached" :
Param "-z" :
ps ++
(Param "--" : map (File . fromRawFilePath) l)
{- Files that are checked into the index or have been committed to a
- branch. -}
inRepoOrBranch :: Branch -> [FilePath] -> Repo -> IO ([FilePath], IO Bool)
inRepoOrBranch :: Branch -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
inRepoOrBranch (Ref b) = inRepo' [Param $ "--with-tree=" ++ b]
{- Scans for files at the specified locations that are not checked into git. -}
notInRepo :: Bool -> [FilePath] -> Repo -> IO ([FilePath], IO Bool)
notInRepo :: Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
notInRepo = notInRepo' []
notInRepo' :: [CommandParam] -> Bool -> [FilePath] -> Repo -> IO ([FilePath], IO Bool)
notInRepo' ps include_ignored l repo = pipeNullSplit params repo
notInRepo' :: [CommandParam] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
notInRepo' ps include_ignored l repo = pipeNullSplit' params repo
where
params = concat
[ [ Param "ls-files", Param "--others"]
, ps
, exclude
, [ Param "-z", Param "--" ]
, map File l
, map (File . fromRawFilePath) l
]
exclude
| include_ignored = []
@ -72,48 +75,48 @@ notInRepo' ps include_ignored l repo = pipeNullSplit params repo
{- Scans for files at the specified locations that are not checked into
- git. Empty directories are included in the result. -}
notInRepoIncludingEmptyDirectories :: Bool -> [FilePath] -> Repo -> IO ([FilePath], IO Bool)
notInRepoIncludingEmptyDirectories :: Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
notInRepoIncludingEmptyDirectories = notInRepo' [Param "--directory"]
{- Finds all files in the specified locations, whether checked into git or
- not. -}
allFiles :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
allFiles l = pipeNullSplit $
allFiles :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
allFiles l = pipeNullSplit' $
Param "ls-files" :
Param "--cached" :
Param "--others" :
Param "-z" :
Param "--" :
map File l
map (File . fromRawFilePath) l
{- Returns a list of files in the specified locations that have been
- deleted. -}
deleted :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
deleted l repo = pipeNullSplit params repo
deleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
deleted l repo = pipeNullSplit' params repo
where
params =
Param "ls-files" :
Param "--deleted" :
Param "-z" :
Param "--" :
map File l
map (File . fromRawFilePath) l
{- Returns a list of files in the specified locations that have been
- modified. -}
modified :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
modified l repo = pipeNullSplit params repo
modified :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
modified l repo = pipeNullSplit' params repo
where
params =
Param "ls-files" :
Param "--modified" :
Param "-z" :
Param "--" :
map File l
map (File . fromRawFilePath) l
{- Files that have been modified or are not checked into git (and are not
- ignored). -}
modifiedOthers :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
modifiedOthers l repo = pipeNullSplit params repo
modifiedOthers :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
modifiedOthers l repo = pipeNullSplit' params repo
where
params =
Param "ls-files" :
@ -122,69 +125,69 @@ modifiedOthers l repo = pipeNullSplit params repo
Param "--exclude-standard" :
Param "-z" :
Param "--" :
map File l
map (File . fromRawFilePath) l
{- Returns a list of all files that are staged for commit. -}
staged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
staged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
staged = staged' []
{- Returns a list of the files, staged for commit, that are being added,
- moved, or changed (but not deleted), from the specified locations. -}
stagedNotDeleted :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
stagedNotDeleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
stagedNotDeleted = staged' [Param "--diff-filter=ACMRT"]
staged' :: [CommandParam] -> [FilePath] -> Repo -> IO ([FilePath], IO Bool)
staged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix
staged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
staged' ps l repo = pipeNullSplit' (prefix ++ ps ++ suffix) repo
where
prefix = [Param "diff", Param "--cached", Param "--name-only", Param "-z"]
suffix = Param "--" : map File l
suffix = Param "--" : map (File . fromRawFilePath) l
type StagedDetails = (FilePath, Maybe Sha, Maybe FileMode)
type StagedDetails = (RawFilePath, Maybe Sha, Maybe FileMode)
{- Returns details about files that are staged in the index,
- as well as files not yet in git. Skips ignored files. -}
stagedOthersDetails :: [FilePath] -> Repo -> IO ([StagedDetails], IO Bool)
stagedOthersDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool)
stagedOthersDetails = stagedDetails' [Param "--others", Param "--exclude-standard"]
{- Returns details about all files that are staged in the index. -}
stagedDetails :: [FilePath] -> Repo -> IO ([StagedDetails], IO Bool)
stagedDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool)
stagedDetails = stagedDetails' []
{- Gets details about staged files, including the Sha of their staged
- contents. -}
stagedDetails' :: [CommandParam] -> [FilePath] -> Repo -> IO ([StagedDetails], IO Bool)
stagedDetails' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool)
stagedDetails' ps l repo = do
(ls, cleanup) <- pipeNullSplit params repo
return (map parse ls, cleanup)
where
params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++
Param "--" : map File l
Param "--" : map (File . fromRawFilePath) l
parse s
| null file = (s, Nothing, Nothing)
| otherwise = (file, extractSha $ take shaSize rest, readmode mode)
| null file = (L.toStrict s, Nothing, Nothing)
| otherwise = (toRawFilePath file, extractSha $ take shaSize rest, readmode mode)
where
(metadata, file) = separate (== '\t') s
(metadata, file) = separate (== '\t') (decodeBL' s)
(mode, rest) = separate (== ' ') metadata
readmode = fst <$$> headMaybe . readOct
{- Returns a list of the files in the specified locations that are staged
- for commit, and whose type has changed. -}
typeChangedStaged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
typeChangedStaged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
typeChangedStaged = typeChanged' [Param "--cached"]
{- Returns a list of the files in the specified locations whose type has
- changed. Files only staged for commit will not be included. -}
typeChanged :: [FilePath] -> Repo -> IO ([FilePath], IO Bool)
typeChanged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
typeChanged = typeChanged' []
typeChanged' :: [CommandParam] -> [FilePath] -> Repo -> IO ([FilePath], IO Bool)
typeChanged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
typeChanged' ps l repo = do
(fs, cleanup) <- pipeNullSplit (prefix ++ ps ++ suffix) repo
-- git diff returns filenames relative to the top of the git repo;
-- convert to filenames relative to the cwd, like git ls-files.
top <- absPath (repoPath repo)
currdir <- getCurrentDirectory
return (map (\f -> relPathDirToFileAbs currdir $ top </> f) fs, cleanup)
return (map (\f -> toRawFilePath (relPathDirToFileAbs currdir $ top </> decodeBL' f)) fs, cleanup)
where
prefix =
[ Param "diff"
@ -192,7 +195,7 @@ typeChanged' ps l repo = do
, Param "--diff-filter=T"
, Param "-z"
]
suffix = Param "--" : (if null l then [File "."] else map File l)
suffix = Param "--" : (if null l then [File "."] else map (File . fromRawFilePath) l)
{- A item in conflict has two possible values.
- Either can be Nothing, when that side deleted the file. -}
@ -202,7 +205,7 @@ data Conflicting v = Conflicting
} deriving (Show)
data Unmerged = Unmerged
{ unmergedFile :: FilePath
{ unmergedFile :: RawFilePath
, unmergedTreeItemType :: Conflicting TreeItemType
, unmergedSha :: Conflicting Sha
}
@ -217,21 +220,21 @@ data Unmerged = Unmerged
- 3 = them
- If a line is omitted, that side removed the file.
-}
unmerged :: [FilePath] -> Repo -> IO ([Unmerged], IO Bool)
unmerged :: [RawFilePath] -> Repo -> IO ([Unmerged], IO Bool)
unmerged l repo = do
(fs, cleanup) <- pipeNullSplit params repo
return (reduceUnmerged [] $ catMaybes $ map parseUnmerged fs, cleanup)
return (reduceUnmerged [] $ catMaybes $ map (parseUnmerged . decodeBL') fs, cleanup)
where
params =
Param "ls-files" :
Param "--unmerged" :
Param "-z" :
Param "--" :
map File l
map (File . fromRawFilePath) l
data InternalUnmerged = InternalUnmerged
{ isus :: Bool
, ifile :: FilePath
, ifile :: RawFilePath
, itreeitemtype :: Maybe TreeItemType
, isha :: Maybe Sha
}
@ -245,9 +248,9 @@ parseUnmerged s
if stage /= 2 && stage /= 3
then Nothing
else do
treeitemtype <- readTreeItemType rawtreeitemtype
treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype)
sha <- extractSha rawsha
return $ InternalUnmerged (stage == 2) file
return $ InternalUnmerged (stage == 2) (toRawFilePath file)
(Just treeitemtype) (Just sha)
_ -> Nothing
where