From 2d46038754c3e9c7c87be9395f1a95a6fc4bb0ef Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 9 Jan 2019 13:06:37 -0400 Subject: [PATCH] converting more log files to use Builder Probably not any particular speedup in this, since most of these logs are not written to often. Possibly chunk log writing is sped up, but writes to chunk logs are interleaved with expensive data transfers to remotes, so unlikely to be a noticiable speedup. --- Annex/Branch/Transitions.hs | 10 ++++++---- Annex/VectorClock.hs | 9 +++++++-- Logs/Chunk.hs | 2 +- Logs/Chunk/Pure.hs | 31 ++++++++++++++++--------------- Logs/Config.hs | 11 +++++++++-- Logs/Export.hs | 28 ++++++++++++++++++---------- Logs/MapLog.hs | 20 +++++++++++--------- Logs/PreferredContent/Raw.hs | 11 +++++++++-- Logs/Presence/Pure.hs | 3 +-- Logs/RemoteState.hs | 8 ++++++-- Logs/SingleValue/Pure.hs | 4 +--- Logs/UUIDBased.hs | 10 ++++++---- 12 files changed, 91 insertions(+), 56 deletions(-) diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs index 5d46b2cc3c..a23ae279b0 100644 --- a/Annex/Branch/Transitions.hs +++ b/Annex/Branch/Transitions.hs @@ -46,10 +46,12 @@ dropDead f content trustmap = case getLogVariety f of | f == trustLog -> PreserveFile | otherwise -> ChangeFile $ encodeBL $ UUIDBased.showLog id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just (decodeBL content) - Just NewUUIDBasedLog -> ChangeFile $ encodeBL $ - UUIDBased.showLogNew id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLogNew Just (decodeBL content) - Just (ChunkLog _) -> ChangeFile $ encodeBL $ - Chunk.showLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog (decodeBL content) + Just NewUUIDBasedLog -> ChangeFile $ toLazyByteString $ + UUIDBased.buildLogNew (byteString . encodeBS) $ + dropDeadFromMapLog trustmap id $ + UUIDBased.parseLogNew Just (decodeBL content) + Just (ChunkLog _) -> ChangeFile $ toLazyByteString $ + Chunk.buildLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog (decodeBL content) Just (PresenceLog _) -> let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content in if null newlog diff --git a/Annex/VectorClock.hs b/Annex/VectorClock.hs index 5e9e2cb89b..3a77f5f0eb 100644 --- a/Annex/VectorClock.hs +++ b/Annex/VectorClock.hs @@ -3,7 +3,7 @@ - We don't have a way yet to keep true distributed vector clocks. - The next best thing is a timestamp. - - - Copyright 2017 Joey Hess + - Copyright 2017-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,12 +11,14 @@ module Annex.VectorClock where import Data.Time.Clock.POSIX +import Data.ByteString.Builder import Control.Applicative import Prelude import Utility.Env import Utility.TimeStamp import Utility.QuickCheck +import Utility.FileSystemEncoding import qualified Data.Attoparsec.ByteString.Lazy as A -- | Some very old logs did not have any time stamp at all; @@ -40,9 +42,12 @@ currentVectorClock = go =<< getEnv "GIT_ANNEX_VECTOR_CLOCK" Nothing -> VectorClock <$> getPOSIXTime formatVectorClock :: VectorClock -> String -formatVectorClock Unknown = "0" +formatVectorClock Unknown = "0" formatVectorClock (VectorClock t) = show t +buildVectorClock :: VectorClock -> Builder +buildVectorClock = byteString . encodeBS' . formatVectorClock + parseVectorClock :: String -> Maybe VectorClock parseVectorClock t = VectorClock <$> parsePOSIXTime t diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs index 7926713915..11b49b6209 100644 --- a/Logs/Chunk.hs +++ b/Logs/Chunk.hs @@ -38,7 +38,7 @@ chunksStored u k chunkmethod chunkcount = do c <- liftIO currentVectorClock config <- Annex.getGitConfig Annex.Branch.change (chunkLogFile config k) $ - encodeBL . showLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog . decodeBL + buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog . decodeBL chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex () chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0 diff --git a/Logs/Chunk/Pure.hs b/Logs/Chunk/Pure.hs index 7fbadb623e..fc78c7300b 100644 --- a/Logs/Chunk/Pure.hs +++ b/Logs/Chunk/Pure.hs @@ -1,6 +1,6 @@ {- Chunk logs, pure operations. - - - Copyright 2014 Joey Hess + - Copyright 2014, 2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,16 +11,19 @@ module Logs.Chunk.Pure , ChunkCount , ChunkLog , parseLog - , showLog + , buildLog ) where import Annex.Common import Logs.MapLog import Data.Int +import qualified Data.ByteString as S +import Data.ByteString.Builder + -- Currently chunks are all fixed size, but other chunking methods -- may be added. -data ChunkMethod = FixedSizeChunks ChunkSize | UnknownChunks String +data ChunkMethod = FixedSizeChunks ChunkSize | UnknownChunks S.ByteString deriving (Ord, Eq, Show) type ChunkSize = Int64 @@ -31,25 +34,23 @@ type ChunkCount = Integer type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount parseChunkMethod :: String -> ChunkMethod -parseChunkMethod s = maybe (UnknownChunks s) FixedSizeChunks (readish s) +parseChunkMethod s = maybe (UnknownChunks $ encodeBS s) FixedSizeChunks (readish s) -showChunkMethod :: ChunkMethod -> String -showChunkMethod (FixedSizeChunks sz) = show sz -showChunkMethod (UnknownChunks s) = s +buildChunkMethod :: ChunkMethod -> Builder +buildChunkMethod (FixedSizeChunks sz) = int64Dec sz +buildChunkMethod (UnknownChunks s) = byteString s parseLog :: String -> ChunkLog parseLog = parseMapLog fieldparser valueparser where fieldparser s = - let (u,m) = separate (== sep) s + let (u,m) = separate (== ':') s in Just (toUUID u, parseChunkMethod m) valueparser = readish -showLog :: ChunkLog -> String -showLog = showMapLog fieldshower valueshower +buildLog :: ChunkLog -> Builder +buildLog = buildMapLog fieldbuilder valuebuilder where - fieldshower (u, m) = fromUUID u ++ sep : showChunkMethod m - valueshower = show - -sep :: Char -sep = ':' + fieldbuilder (u, m) = byteString (fromUUID u) <> sep <> buildChunkMethod m + valuebuilder = integerDec + sep = charUtf8 ':' diff --git a/Logs/Config.hs b/Logs/Config.hs index a98ac64398..767cee48ac 100644 --- a/Logs/Config.hs +++ b/Logs/Config.hs @@ -1,6 +1,6 @@ {- git-annex repository-global config log - - - Copyright 2017 Joey Hess + - Copyright 2017-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -20,6 +20,7 @@ import Logs.MapLog import qualified Annex.Branch import qualified Data.Map as M +import Data.ByteString.Builder type ConfigName = String type ConfigValue = String @@ -34,7 +35,7 @@ setGlobalConfig' :: ConfigName -> ConfigValue -> Annex () setGlobalConfig' name new = do c <- liftIO currentVectorClock Annex.Branch.change configLog $ - encodeBL . showMapLog id id . changeMapLog c name new . parseGlobalConfig . decodeBL + buildGlobalConfig . changeMapLog c name new . parseGlobalConfig . decodeBL unsetGlobalConfig :: ConfigName -> Annex () unsetGlobalConfig name = do @@ -46,6 +47,12 @@ unsetGlobalConfig name = do getGlobalConfig :: ConfigName -> Annex (Maybe ConfigValue) getGlobalConfig name = M.lookup name <$> loadGlobalConfig +buildGlobalConfig :: MapLog ConfigName ConfigValue -> Builder +buildGlobalConfig = buildMapLog fieldbuilder valuebuilder + where + fieldbuilder = byteString . encodeBS + valuebuilder = byteString . encodeBS + parseGlobalConfig :: String -> MapLog ConfigName ConfigValue parseGlobalConfig = parseMapLog Just Just diff --git a/Logs/Export.hs b/Logs/Export.hs index 7817ca04dc..e7e2cce109 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -18,6 +18,8 @@ import Logs import Logs.MapLog import Annex.UUID +import Data.ByteString.Builder + data Exported = Exported { exportedTreeish :: Git.Ref , incompleteExportedTreeish :: [Git.Ref] @@ -69,7 +71,7 @@ recordExport remoteuuid ec = do let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid } let exported = Exported (newTreeish ec) [] Annex.Branch.change exportLog $ - encodeBL . showExportLog + buildExportLog . changeMapLog c ep exported . M.mapWithKey (updateothers c u) . parseExportLog . decodeBL @@ -94,7 +96,7 @@ recordExportBeginning remoteuuid newtree = do <$> Annex.Branch.get exportLog let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) } Annex.Branch.change exportLog $ - encodeBL . showExportLog + buildExportLog . changeMapLog c ep new . parseExportLog . decodeBL Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree") @@ -102,12 +104,14 @@ recordExportBeginning remoteuuid newtree = do parseExportLog :: String -> MapLog ExportParticipants Exported parseExportLog = parseMapLog parseExportParticipants parseExported -showExportLog :: MapLog ExportParticipants Exported -> String -showExportLog = showMapLog formatExportParticipants formatExported +buildExportLog :: MapLog ExportParticipants Exported -> Builder +buildExportLog = buildMapLog buildExportParticipants buildExported -formatExportParticipants :: ExportParticipants -> String -formatExportParticipants ep = - fromUUID (exportFrom ep) ++ ':' : fromUUID (exportTo ep) +buildExportParticipants :: ExportParticipants -> Builder +buildExportParticipants ep = byteString (fromUUID (exportFrom ep)) + <> sep <> byteString (fromUUID (exportTo ep)) + where + sep = charUtf8 ':' parseExportParticipants :: String -> Maybe ExportParticipants parseExportParticipants s = case separate (== ':') s of @@ -117,9 +121,13 @@ parseExportParticipants s = case separate (== ':') s of { exportFrom = toUUID f , exportTo = toUUID t } -formatExported :: Exported -> String -formatExported exported = unwords $ map Git.fromRef $ - exportedTreeish exported : incompleteExportedTreeish exported + +buildExported :: Exported -> Builder +buildExported exported = go (exportedTreeish exported : incompleteExportedTreeish exported) + where + go [] = mempty + go (r:rs) = rref r <> mconcat [ charUtf8 ' ' <> rref r' | r' <- rs ] + rref r = byteString (encodeBS' (Git.fromRef r)) parseExported :: String -> Maybe Exported parseExported s = case words s of diff --git a/Logs/MapLog.hs b/Logs/MapLog.hs index 1bc024e2cc..9e09290506 100644 --- a/Logs/MapLog.hs +++ b/Logs/MapLog.hs @@ -1,12 +1,10 @@ -{-# LANGUAGE CPP #-} - {- git-annex Map log - - This is used to store a Map, in a way that can be union merged. - - A line of the log will look like: "timestamp field value" - - - Copyright 2014 Joey Hess + - Copyright 2014, 2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -22,6 +20,7 @@ import Annex.VectorClock import Logs.Line import qualified Data.Map.Strict as M +import Data.ByteString.Builder data LogEntry v = LogEntry { changed :: VectorClock @@ -30,13 +29,16 @@ data LogEntry v = LogEntry type MapLog f v = M.Map f (LogEntry v) -showMapLog :: (f -> String) -> (v -> String) -> MapLog f v -> String -showMapLog fieldshower valueshower = unlines . map showpair . M.toList +buildMapLog :: (f -> Builder) -> (v -> Builder) -> MapLog f v -> Builder +buildMapLog fieldbuilder valuebuilder = mconcat . map genline . M.toList where - showpair (f, LogEntry (VectorClock c) v) = - unwords [show c, fieldshower f, valueshower v] - showpair (f, LogEntry Unknown v) = - unwords ["0", fieldshower f, valueshower v] + genline (f, LogEntry c v) = + buildVectorClock c <> sp + <> fieldbuilder f <> sp + <> valuebuilder v <> nl + sp = charUtf8 ' ' + nl = charUtf8 '\n' + parseMapLog :: Ord f => (String -> Maybe f) -> (String -> Maybe v) -> String -> MapLog f v parseMapLog fieldparser valueparser = M.fromListWith best . mapMaybe parse . splitLines diff --git a/Logs/PreferredContent/Raw.hs b/Logs/PreferredContent/Raw.hs index 730ec348f2..657a6b6a07 100644 --- a/Logs/PreferredContent/Raw.hs +++ b/Logs/PreferredContent/Raw.hs @@ -1,6 +1,6 @@ {- unparsed preferred content expressions - - - Copyright 2012-2014 Joey Hess + - Copyright 2012-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -17,6 +17,7 @@ import Types.StandardGroups import Types.Group import qualified Data.Map as M +import Data.ByteString.Builder {- Changes the preferred content configuration of a remote. -} preferredContentSet :: UUID -> PreferredContentExpression -> Annex () @@ -43,11 +44,17 @@ groupPreferredContentSet :: Group -> PreferredContentExpression -> Annex () groupPreferredContentSet g val = do c <- liftIO currentVectorClock Annex.Branch.change groupPreferredContentLog $ - encodeBL . showMapLog id id + buildGroupPreferredContent . changeMapLog c g val . parseMapLog Just Just . decodeBL Annex.changeState $ \s -> s { Annex.preferredcontentmap = Nothing } +buildGroupPreferredContent :: MapLog Group PreferredContentExpression -> Builder +buildGroupPreferredContent = buildMapLog buildgroup buildexpr + where + buildgroup = byteString . encodeBS + buildexpr = byteString . encodeBS + preferredContentMapRaw :: Annex (M.Map UUID PreferredContentExpression) preferredContentMapRaw = simpleMap . parseLog Just . decodeBL <$> Annex.Branch.get preferredContentLog diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs index 776ae42018..8bed7b6591 100644 --- a/Logs/Presence/Pure.hs +++ b/Logs/Presence/Pure.hs @@ -64,8 +64,7 @@ buildLog :: [LogLine] -> Builder buildLog = mconcat . map genline where genline (LogLine c s (LogInfo i)) = - byteString (encodeBS' (formatVectorClock c)) <> sp <> - genstatus s <> sp <> byteString i <> nl + buildVectorClock c <> sp <> genstatus s <> sp <> byteString i <> nl sp = charUtf8 ' ' nl = charUtf8 '\n' genstatus InfoPresent = charUtf8 '1' diff --git a/Logs/RemoteState.hs b/Logs/RemoteState.hs index cfdb0c980f..5b856b713c 100644 --- a/Logs/RemoteState.hs +++ b/Logs/RemoteState.hs @@ -1,6 +1,6 @@ {- Remote state logs. - - - Copyright 2014 Joey Hess + - Copyright 2014, 2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -17,6 +17,7 @@ import qualified Annex.Branch import qualified Annex import qualified Data.Map as M +import Data.ByteString.Builder type RemoteState = String @@ -25,7 +26,10 @@ setRemoteState u k s = do c <- liftIO currentVectorClock config <- Annex.getGitConfig Annex.Branch.change (remoteStateLogFile config k) $ - encodeBL . showLogNew id . changeLog c u s . parseLogNew Just . decodeBL + buildRemoteState . changeLog c u s . parseLogNew Just . decodeBL + +buildRemoteState :: Log RemoteState -> Builder +buildRemoteState = buildLogNew (byteString . encodeBS) getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState) getRemoteState u k = do diff --git a/Logs/SingleValue/Pure.hs b/Logs/SingleValue/Pure.hs index 1a2c696594..c08ea4f6ca 100644 --- a/Logs/SingleValue/Pure.hs +++ b/Logs/SingleValue/Pure.hs @@ -33,9 +33,7 @@ buildLog :: (SingleValueSerializable v) => Log v -> Builder buildLog = mconcat . map genline . S.toList where genline (LogEntry c v) = - byteString (encodeBS' (formatVectorClock c)) <> sp - <> byteString (serialize v) - <> nl + buildVectorClock c <> sp <> byteString (serialize v) <> nl sp = charUtf8 ' ' nl = charUtf8 '\n' diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs index 4f32c19c7c..f509c62dda 100644 --- a/Logs/UUIDBased.hs +++ b/Logs/UUIDBased.hs @@ -9,7 +9,7 @@ - - New uuid based logs instead use the form: "timestamp UUID INFO" - - - Copyright 2011-2013 Joey Hess + - Copyright 2011-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -23,7 +23,7 @@ module Logs.UUIDBased ( parseLogNew, parseLogWithUUID, showLog, - showLogNew, + buildLogNew, changeLog, addLog, simpleMap, @@ -37,6 +37,8 @@ import Annex.VectorClock import Logs.MapLog import Logs.Line +import Data.ByteString.Builder + type Log v = MapLog UUID v showLog :: (v -> String) -> Log v -> String @@ -74,8 +76,8 @@ parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . splitLines | ts == Unknown = drop 1 ws | otherwise = drop 1 $ beginning ws -showLogNew :: (v -> String) -> Log v -> String -showLogNew = showMapLog fromUUID +buildLogNew :: (v -> Builder) -> Log v -> Builder +buildLogNew = buildMapLog (byteString . fromUUID) parseLogNew :: (String -> Maybe v) -> String -> Log v parseLogNew = parseMapLog (Just . toUUID)