{- git-annex general metadata storage log - - A line of the log will look like "timestamp field [+-]value [...]" - - Note that unset values are preserved. Consider this case: - - We have: - - 100 foo +x - 200 foo -x - - An unmerged remote has: - - 150 foo +x - - After union merge, because the foo -x was preserved, we know that - after the other remote redundantly set foo +x, it was unset, - and so foo currently has no value. - - - Copyright 2014 Joey Hess <joey@kitenet.net> - - Licensed under the GNU GPL version 3 or higher. -} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE CPP #-} module Logs.MetaData ( getCurrentMetaData, addMetaData, addMetaData', currentMetaData, copyMetaData, ) where import Common.Annex import Types.MetaData import Annex.MetaData.StandardFields import qualified Annex.Branch import Logs import Logs.SingleValue import qualified Data.Set as S import qualified Data.Map as M import Data.Time.Clock.POSIX import Data.Time.Format import System.Locale instance SingleValueSerializable MetaData where serialize = Types.MetaData.serialize deserialize = Types.MetaData.deserialize getMetaDataLog :: Key -> Annex (Log MetaData) getMetaDataLog = readLog . metaDataLogFile {- Go through the log from oldest to newest, and combine it all - into a single MetaData representing the current state. - - Automatically generates a lastchanged metadata for each field that's - currently set, based on timestamps in the log. -} getCurrentMetaData :: Key -> Annex MetaData getCurrentMetaData k = do ls <- S.toAscList <$> getMetaDataLog k let loggedmeta = currentMetaData $ combineMetaData $ map value ls return $ currentMetaData $ unionMetaData loggedmeta (lastchanged ls loggedmeta) where lastchanged [] _ = emptyMetaData lastchanged ls (MetaData currentlyset) = let m = foldl' (flip M.union) M.empty (map genlastchanged ls) in MetaData $ -- Add a overall lastchanged using the oldest log -- item (log is in ascending order). M.insert lastChangedField (lastchangedval $ Prelude.last ls) $ M.mapKeys mkLastChangedField $ -- Only include fields that are currently set. m `M.intersection` currentlyset -- Makes each field have the timestamp as its value. genlastchanged l = let MetaData m = value l ts = lastchangedval l in M.map (const ts) m lastchangedval l = S.singleton $ toMetaValue $ showts $ changed l showts = formatTime defaultTimeLocale "%F@%H-%M-%S" . posixSecondsToUTCTime {- Adds in some metadata, which can override existing values, or unset - them, but otherwise leaves any existing metadata as-is. -} addMetaData :: Key -> MetaData -> Annex () addMetaData k metadata = addMetaData' k metadata =<< liftIO getPOSIXTime {- Reusing the same timestamp when making changes to the metadata - of multiple keys is a nice optimisation. The same metadata lines - will tend to be generated across the different log files, and so - git will be able to pack the data more efficiently. -} addMetaData' :: Key -> MetaData -> POSIXTime -> Annex () addMetaData' k d@(MetaData m) now | d == emptyMetaData = noop | otherwise = Annex.Branch.change (metaDataLogFile k) $ showLog . simplifyLog . S.insert (LogEntry now metadata) . parseLog where metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m {- Simplify a log, removing historical values that are no longer - needed. - - This is not as simple as just making a single log line with the newest - state of all metadata. Consider this case: - - We have: - - 100 foo +x bar +y - 200 foo -x - - An unmerged remote has: - - 150 bar -y baz +w - - If what we have were simplified to "200 foo -x bar +y" then when the line - from the remote became available, it would be older than the simplified - line, and its change to bar would not take effect. That is wrong. - - Instead, simplify it to: - - 100 bar +y - 200 foo -x - - (Note that this ends up with the same number of lines as the - unsimplified version, so there's really no point in updating - the log to this version. Doing so would only add data to git, - with little benefit.) - - Now merging with the remote yields: - - 100 bar +y - 150 bar -y baz +w - 200 foo -x - - Simplifying again: - - 150 bar +z baz +w - 200 foo -x -} simplifyLog :: Log MetaData -> Log MetaData simplifyLog s = case sl of (newest:rest) -> let sl' = go [newest] (value newest) rest in if length sl' < length sl then S.fromList sl' else s _ -> s where #if MIN_VERSION_containers(0,5,0) sl = S.toDescList s #else sl = reverse (S.toAscList s) #endif go c _ [] = c go c newer (l:ls) | unique == emptyMetaData = go c newer ls | otherwise = go (l { value = unique } : c) (unionMetaData unique newer) ls where older = value l unique = older `differenceMetaData` newer {- Copies the metadata from the old key to the new key. - - The exact content of the metadata file is copied, so that the timestamps - remain the same, and because this is more space-efficient in the git - repository. - - Any metadata already attached to the new key is not preserved. -} copyMetaData :: Key -> Key -> Annex () copyMetaData oldkey newkey | oldkey == newkey = noop | otherwise = do l <- getMetaDataLog oldkey unless (S.null l) $ Annex.Branch.change (metaDataLogFile newkey) $ const $ showLog l