git-annex/Git/Log.hs
Joey Hess 7c7c9912c1
migrate --update gets keys
The git log is outputting the diff, but this only looks at the new
files. When we have a new file, we can get the old filename by just
replacing "new" with "old". And then use branchFileRef to refer to it
allows catting the old key.

While this does have to skip past the old files in the diff, it's still
faster than calling git diff separately.

Sponsored-by: Nicholas Golder-Manning on Patreon
2023-12-07 17:25:56 -04:00

115 lines
2.9 KiB
Haskell

{- git log
-
- Copyright 2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Git.Log where
import Common
import Git
import Git.Command
import Git.Sha
import Data.Time
import Data.Time.Clock.POSIX
-- A change made to a file.
data LoggedFileChange t = LoggedFileChange
{ changetime :: POSIXTime
, changed :: t
, changedfile :: FilePath
, oldref :: Ref
, newref :: Ref
}
deriving (Show)
-- Get the git log of changes to files.
--
-- Note that the returned cleanup action should only be
-- run after processing the returned list.
getGitLog
:: Ref
-> Maybe Ref
-> [FilePath]
-> [CommandParam]
-> (Sha -> FilePath -> Maybe t)
-> Repo
-> IO ([LoggedFileChange t], IO Bool)
getGitLog ref stopref fs os selector repo = do
(ls, cleanup) <- pipeNullSplit ps repo
return (parseGitRawLog selector (map decodeBL ls), cleanup)
where
ps =
[ Param "log"
, Param "-z"
, Param ("--pretty=format:"++commitinfoFormat)
, Param "--raw"
, Param "--no-abbrev"
, Param "--no-renames"
] ++ os ++
[ case stopref of
Just stopref' -> Param $
fromRef stopref' <> ".." <> fromRef ref
Nothing -> Param (fromRef ref)
, Param "--"
] ++ map Param fs
-- The commitinfo is the commit hash followed by its timestamp.
commitinfoFormat :: String
commitinfoFormat = "%H %ct"
-- Parses chunked git log --raw output generated by getGitLog,
-- which looks something like:
--
-- [ "commitinfo\n:changeline"
-- , "filename"
-- , ""
-- , "commitinfo\n:changeline"
-- , "filename"
-- , ":changeline"
-- , "filename"
-- , ""
-- ]
--
-- The commitinfo is not included before all changelines, so
-- keep track of the most recently seen commitinfo.
parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [LoggedFileChange t]
parseGitRawLog selector = parse (deleteSha, epoch)
where
epoch = toEnum 0 :: POSIXTime
parse old ([]:rest) = parse old rest
parse (oldcommitsha, oldts) (c1:c2:rest) = case mrc of
Just rc -> rc : parse (commitsha, ts) rest
Nothing -> parse (commitsha, ts) (c2:rest)
where
(commitsha, ts, cl) = case separate (== '\n') c1 of
(cl', []) -> (oldcommitsha, oldts, cl')
(ci, cl') -> case words ci of
(css:tss:[]) -> (Ref (encodeBS css), parseTimeStamp tss, cl')
_ -> (oldcommitsha, oldts, cl')
mrc = do
(old, new) <- parseRawChangeLine cl
v <- selector commitsha c2
return $ LoggedFileChange
{ changetime = ts
, changed = v
, changedfile = c2
, oldref = old
, newref = new
}
parse _ _ = []
-- Parses something like ":100644 100644 oldsha newsha M"
-- extracting the shas.
parseRawChangeLine :: String -> Maybe (Git.Ref, Git.Ref)
parseRawChangeLine = go . words
where
go (_:_:oldsha:newsha:_) =
Just (Git.Ref (encodeBS oldsha), Git.Ref (encodeBS newsha))
go _ = Nothing
parseTimeStamp :: String -> POSIXTime
parseTimeStamp = utcTimeToPOSIXSeconds . fromMaybe (giveup "bad timestamp") .
parseTimeM True defaultTimeLocale "%s"