{- git-annex command
 -
 - Copyright 2012-2021 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings #-}

module Command.Log where

import qualified Data.Set as S
import qualified Data.Map as M
import Data.Char
import Data.Time.Clock.POSIX
import Data.Time
import qualified Data.ByteString.Char8 as B8
import qualified System.FilePath.ByteString as P

import Command
import Logs
import Logs.Location
import qualified Annex.Branch
import qualified Git
import Git.Command
import qualified Remote
import qualified Annex

data RefChange = RefChange 
	{ changetime :: POSIXTime
	, oldref :: Git.Ref
	, newref :: Git.Ref
	, changekey :: Key
	}
	deriving (Show)

data LogChange = Added | Removed

type Outputter = LogChange -> POSIXTime -> [UUID] -> Annex ()

cmd :: Command
cmd = withAnnexOptions [annexedMatchingOptions] $
	command "log" SectionQuery "shows location log"
		paramPaths (seek <$$> optParser)

data LogOptions = LogOptions
	{ logFiles :: CmdParams
	, allOption :: Bool
	, rawDateOption :: Bool
	, gourceOption :: Bool
	, passthruOptions :: [CommandParam]
	}

optParser :: CmdParamsDesc -> Parser LogOptions
optParser desc = LogOptions
	<$> cmdParams desc
	<*> switch
		( long "all"
		<> short 'A'
		<> help "display location log changes to all files"
		)
	<*> switch
		( long "raw-date"
		<> help "display seconds from unix epoch"
		)
	<*> switch
		( long "gource"
		<> help "format output for gource"
		)
	<*> (concat <$> many passthru)
  where
	passthru :: Parser [CommandParam]
	passthru = datepassthru "since"
		<|> datepassthru "after"
		<|> datepassthru "until"
		<|> datepassthru "before"
		<|> (mkpassthru "max-count" <$> strOption
			( long "max-count" <> metavar paramNumber
			<> help "limit number of logs displayed"
			))
	datepassthru n = mkpassthru n <$> strOption
		( long n <> metavar paramDate
		<> help ("show log " ++ n ++ " date")
		)
	mkpassthru n v = [Param ("--" ++ n), Param v]

seek :: LogOptions -> CommandSeek
seek o = ifM (null <$> Annex.Branch.getUnmergedRefs)
	( do
		m <- Remote.uuidDescriptions
		zone <- liftIO getCurrentTimeZone
		let outputter = mkOutputter m zone o
		let seeker = AnnexedFileSeeker
			{ startAction = start o outputter
			, checkContentPresent = Nothing
			-- the way this uses the location log would not be
			-- helped by precaching the current value
			, usesLocationLog = False
			}
		case (logFiles o, allOption o) of
			(fs, False) -> withFilesInGitAnnex ww seeker
				=<< workTreeItems ww fs
			([], True) -> commandAction (startAll o outputter)
			(_, True) -> giveup "Cannot specify both files and --all"
	, giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents displaying location log changes. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
	)
  where
	ww = WarnUnmatchLsFiles

start :: LogOptions -> (FilePath -> Outputter) -> SeekInput -> RawFilePath -> Key -> CommandStart
start o outputter _ file key = do
	(changes, cleanup) <- getKeyLog key (passthruOptions o)
	showLogIncremental (outputter (fromRawFilePath file)) changes
	void $ liftIO cleanup
	stop

startAll :: LogOptions -> (String -> Outputter) -> CommandStart
startAll o outputter = do
	(changes, cleanup) <- getAllLog (passthruOptions o)
	showLog outputter changes
	void $ liftIO cleanup
	stop

{- Displays changes made. Only works when all the RefChanges are for the
 - same key. The method is to compare each value with the value
 - after it in the list, which is the old version of the value.
 -
 - This ncessarily buffers the whole list, so does not stream.
 - But, the number of location log changes for a single key tends to be
 - fairly small.
 -
 - This minimizes the number of reads from git; each logged value is read
 - only once.
 -
 - This also generates subtly better output when the git-annex branch
 - got diverged.
 -}
showLogIncremental :: Outputter -> [RefChange] -> Annex ()
showLogIncremental outputter ps = do
	sets <- mapM (getset newref) ps
	previous <- maybe (return genesis) (getset oldref) (lastMaybe ps)
	let l = sets ++ [previous]
	let changes = map (\((t, new), (_, old)) -> (t, new, old))
		(zip l (drop 1 l))
	sequence_ $ compareChanges outputter changes
  where
	genesis = (0, S.empty)
	getset select change = do
		s <- S.fromList <$> loggedLocationsRef (select change)
		return (changetime change, s)

{- Displays changes made. Streams, and can display changes affecting
 - different keys, but does twice as much reading of logged values
 - as showLogIncremental. -}
showLog :: (String -> Outputter) -> [RefChange] -> Annex ()
showLog outputter cs = forM_ cs $ \c -> do
	let keyname = serializeKey (changekey c)
	new <- S.fromList <$> loggedLocationsRef (newref c)
	old <- S.fromList <$> loggedLocationsRef (oldref c)
	sequence_ $ compareChanges (outputter keyname)
		[(changetime c, new, old)]

mkOutputter :: UUIDDescMap -> TimeZone -> LogOptions -> FilePath -> Outputter
mkOutputter m zone o file
	| rawDateOption o = normalOutput lookupdescription file show
	| gourceOption o = gourceOutput lookupdescription file
	| otherwise = normalOutput lookupdescription file (showTimeStamp zone)
  where
	lookupdescription u = maybe (fromUUID u) (fromUUIDDesc) (M.lookup u m)

normalOutput :: (UUID -> String) -> FilePath -> (POSIXTime -> String) -> Outputter
normalOutput lookupdescription file formattime logchange ts us = do
	qp <- coreQuotePath <$> Annex.getGitConfig
	liftIO $ mapM_ (B8.putStrLn . quote qp . format) us
  where
	time = formattime ts
	addel = case logchange of
		Added -> "+"
		Removed -> "-"
	format u = UnquotedString addel <> " " <> UnquotedString time <> " " 
		<> QuotedPath (toRawFilePath file) <> " | " <> UnquotedByteString (fromUUID u)
		<> " -- " <> UnquotedString (lookupdescription u)

gourceOutput :: (UUID -> String) -> FilePath -> Outputter
gourceOutput lookupdescription file logchange ts us =
	liftIO $ mapM_ (putStrLn . intercalate "|" . format) us
  where
	time = takeWhile isDigit $ show ts
	addel = case logchange of
		Added -> "A" 
		Removed -> "M"
	format u = [ time, lookupdescription u, addel, file ]

{- Generates a display of the changes.
 - Uses a formatter to generate a display of items that are added and
 - removed. -}
compareChanges :: Ord a => (LogChange -> POSIXTime -> [a] -> b) -> [(POSIXTime, S.Set a, S.Set a)] -> [b]
compareChanges format changes = concatMap diff changes
  where
	diff (ts, new, old) =
		[ format Added ts   $ S.toList $ S.difference new old
		, format Removed ts $ S.toList $ S.difference old new
		]

{- Streams the git log for a given key's location log file.
 -
 - This is complicated by git log using paths relative to the current
 - directory, even when looking at files in a different branch. A wacky
 - relative path to the log file has to be used.
 -
 - The --remove-empty is a significant optimisation. It relies on location
 - log files never being deleted in normal operation. Letting git stop
 - once the location log file is gone avoids it checking all the way back
 - to commit 0 to see if it used to exist, so generally speeds things up a
 - *lot* for newish files. -}
getKeyLog :: Key -> [CommandParam] -> Annex ([RefChange], IO Bool)
getKeyLog key os = do
	top <- fromRepo Git.repoPath
	p <- liftIO $ relPathCwdToFile top
	config <- Annex.getGitConfig
	let logfile = p P.</> locationLogFile config key
	getGitLog [fromRawFilePath logfile] (Param "--remove-empty" : os)

{- Streams the git log for all git-annex branch changes. -}
getAllLog :: [CommandParam] -> Annex ([RefChange], IO Bool)
getAllLog = getGitLog []

getGitLog :: [FilePath] -> [CommandParam] -> Annex ([RefChange], IO Bool)
getGitLog fs os = do
	config <- Annex.getGitConfig
	(ls, cleanup) <- inRepo $ pipeNullSplit $
		[ Param "log"
		, Param "-z"
		, Param "--pretty=format:%ct"
		, Param "--raw"
		, Param "--no-abbrev"
		] ++ os ++
		[ Param $ Git.fromRef Annex.Branch.fullname
		, Param "--"
		] ++ map Param fs
	return (parseGitRawLog config (map decodeBL ls), cleanup)

-- Parses chunked git log --raw output, which looks something like:
--
-- [ "timestamp\n:changeline"
-- , "logfile"
-- , ""
-- , "timestamp\n:changeline"
-- , "logfile"
-- , ":changeline"
-- , "logfile"
-- , ""
-- ]
--
-- The timestamp is not included before all changelines, so
-- keep track of the most recently seen timestamp.
parseGitRawLog :: GitConfig -> [String] -> [RefChange]
parseGitRawLog config = parse epoch
  where
	epoch = toEnum 0 :: POSIXTime
	parse oldts ([]:rest) = parse oldts rest
	parse oldts (c1:c2:rest) = case mrc of
		Just rc -> rc : parse ts rest
		Nothing -> parse ts (c2:rest)
	  where
		(ts, cl) = case separate (== '\n') c1 of
			(cl', []) -> (oldts, cl')
			(tss, cl') -> (parseTimeStamp tss, cl')
	  	mrc = do
			(old, new) <- parseRawChangeLine cl
			key <- locationLogFileKey config (toRawFilePath c2)
			return $ RefChange
				{ changetime = ts
				, oldref = old
				, newref = new
				, changekey = key
				}	
	parse _ _ = []

-- Parses something like "100644 100644 oldsha newsha M"
parseRawChangeLine :: String -> Maybe (Git.Ref, Git.Ref)
parseRawChangeLine = go . words
  where
	go (_:_:oldsha:newsha:_) = 
		Just (Git.Ref (encodeBS oldsha), Git.Ref (encodeBS newsha))
	go _ = Nothing

parseTimeStamp :: String -> POSIXTime
parseTimeStamp = utcTimeToPOSIXSeconds . fromMaybe (giveup "bad timestamp") .
	parseTimeM True defaultTimeLocale "%s"

showTimeStamp :: TimeZone -> POSIXTime -> String
showTimeStamp zone = formatTime defaultTimeLocale rfc822DateFormat 
	. utcToZonedTime zone . posixSecondsToUTCTime