split out generic git log parser

Sponsored-By: Jack Hill on Patreon
This commit is contained in:
Joey Hess 2023-11-10 12:37:01 -04:00
parent ae401fae14
commit 561c036664
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 123 additions and 89 deletions

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2012-2021 Joey Hess <id@joeyh.name> - Copyright 2012-2023 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -20,19 +20,11 @@ import qualified System.FilePath.ByteString as P
import Command import Command
import Logs import Logs
import Logs.Location import Logs.Location
import qualified Annex.Branch
import qualified Git
import Git.Command
import qualified Remote
import qualified Annex import qualified Annex
import qualified Annex.Branch
data RefChange = RefChange import qualified Remote
{ changetime :: POSIXTime import qualified Git
, oldref :: Git.Ref import Git.Log
, newref :: Git.Ref
, changekey :: Key
}
deriving (Show)
data LogChange = Added | Removed data LogChange = Added | Removed
@ -117,7 +109,7 @@ start o outputter si file key = do
startAll :: LogOptions -> (ActionItem -> SeekInput -> Outputter) -> CommandStart startAll :: LogOptions -> (ActionItem -> SeekInput -> Outputter) -> CommandStart
startAll o outputter = do startAll o outputter = do
(changes, cleanup) <- getAllLog (passthruOptions o) (changes, cleanup) <- getGitLogAnnex [] (passthruOptions o)
showLog (\ai -> outputter ai (SeekInput [])) changes showLog (\ai -> outputter ai (SeekInput [])) changes
void $ liftIO cleanup void $ liftIO cleanup
stop stop
@ -136,7 +128,7 @@ startAll o outputter = do
- This also generates subtly better output when the git-annex branch - This also generates subtly better output when the git-annex branch
- got diverged. - got diverged.
-} -}
showLogIncremental :: Outputter -> [RefChange] -> Annex () showLogIncremental :: Outputter -> [RefChange Key] -> Annex ()
showLogIncremental outputter ps = do showLogIncremental outputter ps = do
sets <- mapM (getset newref) ps sets <- mapM (getset newref) ps
previous <- maybe (return genesis) (getset oldref) (lastMaybe ps) previous <- maybe (return genesis) (getset oldref) (lastMaybe ps)
@ -153,9 +145,9 @@ showLogIncremental outputter ps = do
{- Displays changes made. Streams, and can display changes affecting {- Displays changes made. Streams, and can display changes affecting
- different keys, but does twice as much reading of logged values - different keys, but does twice as much reading of logged values
- as showLogIncremental. -} - as showLogIncremental. -}
showLog :: (ActionItem -> Outputter) -> [RefChange] -> Annex () showLog :: (ActionItem -> Outputter) -> [RefChange Key] -> Annex ()
showLog outputter cs = forM_ cs $ \c -> do showLog outputter cs = forM_ cs $ \c -> do
let ai = mkActionItem (changekey c) let ai = mkActionItem (changed c)
new <- S.fromList <$> loggedLocationsRef (newref c) new <- S.fromList <$> loggedLocationsRef (newref c)
old <- S.fromList <$> loggedLocationsRef (oldref c) old <- S.fromList <$> loggedLocationsRef (oldref c)
sequence_ $ compareChanges (outputter ai) sequence_ $ compareChanges (outputter ai)
@ -236,85 +228,19 @@ compareChanges format changes = concatMap diff changes
- once the location log file is gone avoids it checking all the way back - once the location log file is gone avoids it checking all the way back
- to commit 0 to see if it used to exist, so generally speeds things up a - to commit 0 to see if it used to exist, so generally speeds things up a
- *lot* for newish files. -} - *lot* for newish files. -}
getKeyLog :: Key -> [CommandParam] -> Annex ([RefChange], IO Bool) getKeyLog :: Key -> [CommandParam] -> Annex ([RefChange Key], IO Bool)
getKeyLog key os = do getKeyLog key os = do
top <- fromRepo Git.repoPath top <- fromRepo Git.repoPath
p <- liftIO $ relPathCwdToFile top p <- liftIO $ relPathCwdToFile top
config <- Annex.getGitConfig config <- Annex.getGitConfig
let logfile = p P.</> locationLogFile config key let logfile = p P.</> locationLogFile config key
getGitLog [fromRawFilePath logfile] (Param "--remove-empty" : os) getGitLogAnnex [fromRawFilePath logfile] (Param "--remove-empty" : os)
{- Streams the git log for all git-annex branch changes to location log getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([RefChange Key], IO Bool)
- files. -} getGitLogAnnex fs os = do
getAllLog :: [CommandParam] -> Annex ([RefChange], IO Bool)
getAllLog = getGitLog []
getGitLog :: [FilePath] -> [CommandParam] -> Annex ([RefChange], IO Bool)
getGitLog fs os = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
(ls, cleanup) <- inRepo $ pipeNullSplit $ let fileselector = locationLogFileKey config . toRawFilePath
[ Param "log" inRepo $ getGitLog Annex.Branch.fullname fs os fileselector
, Param "-z"
, Param "--pretty=format:%ct"
, Param "--raw"
, Param "--no-abbrev"
, Param "--no-renames"
] ++ os ++
[ Param $ Git.fromRef Annex.Branch.fullname
, Param "--"
] ++ map Param fs
return (parseGitRawLog config (map decodeBL ls), cleanup)
-- Parses chunked git log --raw output, which looks something like:
--
-- [ "timestamp\n:changeline"
-- , "logfile"
-- , ""
-- , "timestamp\n:changeline"
-- , "logfile"
-- , ":changeline"
-- , "logfile"
-- , ""
-- ]
--
-- The timestamp is not included before all changelines, so
-- keep track of the most recently seen timestamp.
--
-- Only changes to location log files are returned.
parseGitRawLog :: GitConfig -> [String] -> [RefChange]
parseGitRawLog config = parse epoch
where
epoch = toEnum 0 :: POSIXTime
parse oldts ([]:rest) = parse oldts rest
parse oldts (c1:c2:rest) = case mrc of
Just rc -> rc : parse ts rest
Nothing -> parse ts (c2:rest)
where
(ts, cl) = case separate (== '\n') c1 of
(cl', []) -> (oldts, cl')
(tss, cl') -> (parseTimeStamp tss, cl')
mrc = do
(old, new) <- parseRawChangeLine cl
key <- locationLogFileKey config (toRawFilePath c2)
return $ RefChange
{ changetime = ts
, oldref = old
, newref = new
, changekey = key
}
parse _ _ = []
-- Parses something like "100644 100644 oldsha newsha M"
parseRawChangeLine :: String -> Maybe (Git.Ref, Git.Ref)
parseRawChangeLine = go . words
where
go (_:_:oldsha:newsha:_) =
Just (Git.Ref (encodeBS oldsha), Git.Ref (encodeBS newsha))
go _ = Nothing
parseTimeStamp :: String -> POSIXTime
parseTimeStamp = utcTimeToPOSIXSeconds . fromMaybe (giveup "bad timestamp") .
parseTimeM True defaultTimeLocale "%s"
showTimeStamp :: TimeZone -> POSIXTime -> String showTimeStamp :: TimeZone -> POSIXTime -> String
showTimeStamp zone = formatTime defaultTimeLocale rfc822DateFormat showTimeStamp zone = formatTime defaultTimeLocale rfc822DateFormat

107
Git/Log.hs Normal file
View file

@ -0,0 +1,107 @@
{- git log
-
- Copyright 2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Git.Log where
import Common
import Git
import Git.Command
import Data.Time
import Data.Time.Clock.POSIX
-- A change made to a file.
data RefChange t = RefChange
{ changetime :: POSIXTime
, changed :: t
, changedfile :: FilePath
, oldref :: Ref
, newref :: Ref
}
deriving (Show)
-- Get the git log. Note that the returned cleanup action should only be
-- run after processing the returned list.
getGitLog
:: Ref
-> [FilePath]
-> [CommandParam]
-> (FilePath -> Maybe t)
-> Repo
-> IO ([RefChange t], IO Bool)
getGitLog ref fs os fileselector repo = do
(ls, cleanup) <- pipeNullSplit ps repo
return (parseGitRawLog fileselector (map decodeBL ls), cleanup)
where
ps =
[ Param "log"
, Param "-z"
, Param ("--pretty=format:"++commitinfoFormat)
, Param "--raw"
, Param "--no-abbrev"
, Param "--no-renames"
] ++ os ++
[ Param (fromRef ref)
, Param "--"
] ++ map Param fs
-- The commitinfo is the timestamp of the commit, followed by
-- the commit hash and then the commit's parents, separated by spaces.
commitinfoFormat :: String
commitinfoFormat = "%ct"
-- Parses chunked git log --raw output generated by getGitLog,
-- which looks something like:
--
-- [ "commitinfo\n:changeline"
-- , "filename"
-- , ""
-- , "commitinfo\n:changeline"
-- , "filename"
-- , ":changeline"
-- , "filename"
-- , ""
-- ]
--
-- The commitinfo is not included before all changelines, so
-- keep track of the most recently seen commitinfo.
parseGitRawLog :: (FilePath -> Maybe t) -> [String] -> [RefChange t]
parseGitRawLog fileselector = parse epoch
where
epoch = toEnum 0 :: POSIXTime
parse oldts ([]:rest) = parse oldts rest
parse oldts (c1:c2:rest) = case mrc of
Just rc -> rc : parse ts rest
Nothing -> parse ts (c2:rest)
where
(ts, cl) = case separate (== '\n') c1 of
(cl', []) -> (oldts, cl')
(tss, cl') -> (parseTimeStamp tss, cl')
mrc = do
(old, new) <- parseRawChangeLine cl
v <- fileselector c2
return $ RefChange
{ changetime = ts
, changed = v
, changedfile = c2
, oldref = old
, newref = new
}
parse _ _ = []
-- Parses something like ":100644 100644 oldsha newsha M"
-- extracting the shas.
parseRawChangeLine :: String -> Maybe (Git.Ref, Git.Ref)
parseRawChangeLine = go . words
where
go (_:_:oldsha:newsha:_) =
Just (Git.Ref (encodeBS oldsha), Git.Ref (encodeBS newsha))
go _ = Nothing
parseTimeStamp :: String -> POSIXTime
parseTimeStamp = utcTimeToPOSIXSeconds . fromMaybe (giveup "bad timestamp") .
parseTimeM True defaultTimeLocale "%s"

View file

@ -773,6 +773,7 @@ Executable git-annex
Git.Hook Git.Hook
Git.Index Git.Index
Git.LockFile Git.LockFile
Git.Log
Git.LsFiles Git.LsFiles
Git.LsTree Git.LsTree
Git.Merge Git.Merge