attoparsec parsers for all new-format uuid-based logs

There should be some speed gains here, especially for chunk and remote
state logs, which are queried once per key.

Now only old-format uuid-based logs still need to be converted to attoparsec.
This commit is contained in:
Joey Hess 2019-01-10 13:23:42 -04:00
parent 7e54c215b4
commit 66603d6f75
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
10 changed files with 88 additions and 56 deletions

View file

@ -24,6 +24,7 @@ import Types.MetaData
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder import Data.ByteString.Builder
data FileTransition data FileTransition
@ -48,11 +49,11 @@ dropDead f content trustmap = case getLogVariety f of
UUIDBased.buildLog (byteString . encodeBS) $ UUIDBased.buildLog (byteString . encodeBS) $
dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just (decodeBL content) dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just (decodeBL content)
Just NewUUIDBasedLog -> ChangeFile $ Just NewUUIDBasedLog -> ChangeFile $
UUIDBased.buildLogNew (byteString . encodeBS) $ UUIDBased.buildLogNew byteString $
dropDeadFromMapLog trustmap id $ dropDeadFromMapLog trustmap id $
UUIDBased.parseLogNew Just (decodeBL content) UUIDBased.parseLogNew A.takeByteString content
Just (ChunkLog _) -> ChangeFile $ Just (ChunkLog _) -> ChangeFile $
Chunk.buildLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog (decodeBL content) Chunk.buildLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog content
Just (PresenceLog _) -> Just (PresenceLog _) ->
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
in if null newlog in if null newlog

View file

@ -38,7 +38,7 @@ chunksStored u k chunkmethod chunkcount = do
c <- liftIO currentVectorClock c <- liftIO currentVectorClock
config <- Annex.getGitConfig config <- Annex.getGitConfig
Annex.Branch.change (chunkLogFile config k) $ Annex.Branch.change (chunkLogFile config k) $
buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog . decodeBL buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex () chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0 chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
@ -46,7 +46,7 @@ chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)] getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)]
getCurrentChunks u k = do getCurrentChunks u k = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
select . parseLog . decodeBL <$> Annex.Branch.get (chunkLogFile config k) select . parseLog <$> Annex.Branch.get (chunkLogFile config k)
where where
select = filter (\(_m, ct) -> ct > 0) select = filter (\(_m, ct) -> ct > 0)
. map (\((_ku, m), l) -> (m, value l)) . map (\((_ku, m), l) -> (m, value l))

View file

@ -19,6 +19,9 @@ import Logs.MapLog
import Data.Int import Data.Int
import qualified Data.ByteString as S import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder import Data.ByteString.Builder
-- Currently chunks are all fixed size, but other chunking methods -- Currently chunks are all fixed size, but other chunking methods
@ -33,20 +36,13 @@ type ChunkCount = Integer
type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount
parseChunkMethod :: String -> ChunkMethod
parseChunkMethod s = maybe (UnknownChunks $ encodeBS s) FixedSizeChunks (readish s)
buildChunkMethod :: ChunkMethod -> Builder buildChunkMethod :: ChunkMethod -> Builder
buildChunkMethod (FixedSizeChunks sz) = int64Dec sz buildChunkMethod (FixedSizeChunks sz) = int64Dec sz
buildChunkMethod (UnknownChunks s) = byteString s buildChunkMethod (UnknownChunks s) = byteString s
parseLog :: String -> ChunkLog chunkMethodParser :: A.Parser ChunkMethod
parseLog = parseMapLog fieldparser valueparser chunkMethodParser =
where (FixedSizeChunks <$> A8.decimal) <|> (UnknownChunks <$> A.takeByteString)
fieldparser s =
let (u,m) = separate (== ':') s
in Just (toUUID u, parseChunkMethod m)
valueparser = readish
buildLog :: ChunkLog -> Builder buildLog :: ChunkLog -> Builder
buildLog = buildMapLog fieldbuilder valuebuilder buildLog = buildMapLog fieldbuilder valuebuilder
@ -54,3 +50,13 @@ buildLog = buildMapLog fieldbuilder valuebuilder
fieldbuilder (u, m) = buildUUID u <> sep <> buildChunkMethod m fieldbuilder (u, m) = buildUUID u <> sep <> buildChunkMethod m
valuebuilder = integerDec valuebuilder = integerDec
sep = charUtf8 ':' sep = charUtf8 ':'
parseLog :: L.ByteString -> ChunkLog
parseLog = parseMapLog fieldparser valueparser
where
fieldparser = (,)
<$> (toUUID <$> A8.takeTill (== ':'))
<* A8.char ':'
<*> chunkMethodParser
<* A.endOfInput
valueparser = A8.decimal

View file

@ -20,6 +20,8 @@ import Logs.MapLog
import qualified Annex.Branch import qualified Annex.Branch
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder import Data.ByteString.Builder
type ConfigName = String type ConfigName = String
@ -35,7 +37,7 @@ setGlobalConfig' :: ConfigName -> ConfigValue -> Annex ()
setGlobalConfig' name new = do setGlobalConfig' name new = do
c <- liftIO currentVectorClock c <- liftIO currentVectorClock
Annex.Branch.change configLog $ Annex.Branch.change configLog $
buildGlobalConfig . changeMapLog c name new . parseGlobalConfig . decodeBL buildGlobalConfig . changeMapLog c name new . parseGlobalConfig
unsetGlobalConfig :: ConfigName -> Annex () unsetGlobalConfig :: ConfigName -> Annex ()
unsetGlobalConfig name = do unsetGlobalConfig name = do
@ -53,9 +55,11 @@ buildGlobalConfig = buildMapLog fieldbuilder valuebuilder
fieldbuilder = byteString . encodeBS fieldbuilder = byteString . encodeBS
valuebuilder = byteString . encodeBS valuebuilder = byteString . encodeBS
parseGlobalConfig :: String -> MapLog ConfigName ConfigValue parseGlobalConfig :: L.ByteString -> MapLog ConfigName ConfigValue
parseGlobalConfig = parseMapLog Just Just parseGlobalConfig = parseMapLog string string
where
string = decodeBS <$> A.takeByteString
loadGlobalConfig :: Annex (M.Map ConfigName ConfigValue) loadGlobalConfig :: Annex (M.Map ConfigName ConfigValue)
loadGlobalConfig = M.filter (not . null) . simpleMap . parseGlobalConfig . decodeBL loadGlobalConfig = M.filter (not . null) . simpleMap . parseGlobalConfig
<$> Annex.Branch.get configLog <$> Annex.Branch.get configLog

View file

@ -18,6 +18,9 @@ import Logs
import Logs.MapLog import Logs.MapLog
import Annex.UUID import Annex.UUID
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder import Data.ByteString.Builder
data Exported = Exported data Exported = Exported
@ -30,7 +33,7 @@ data ExportParticipants = ExportParticipants
{ exportFrom :: UUID { exportFrom :: UUID
, exportTo :: UUID , exportTo :: UUID
} }
deriving (Eq, Ord) deriving (Eq, Ord, Show)
data ExportChange = ExportChange data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref] { oldTreeish :: [Git.Ref]
@ -44,7 +47,6 @@ data ExportChange = ExportChange
getExport :: UUID -> Annex [Exported] getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap
. parseExportLog . parseExportLog
. decodeBL
<$> Annex.Branch.get exportLog <$> Annex.Branch.get exportLog
where where
get (ep, exported) get (ep, exported)
@ -74,7 +76,7 @@ recordExport remoteuuid ec = do
buildExportLog buildExportLog
. changeMapLog c ep exported . changeMapLog c ep exported
. M.mapWithKey (updateothers c u) . M.mapWithKey (updateothers c u)
. parseExportLog . decodeBL . parseExportLog
where where
updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t })) updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t }))
| u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le | u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le
@ -92,17 +94,16 @@ recordExportBeginning remoteuuid newtree = do
old <- fromMaybe (Exported emptyTree []) old <- fromMaybe (Exported emptyTree [])
. M.lookup ep . simpleMap . M.lookup ep . simpleMap
. parseExportLog . parseExportLog
. decodeBL
<$> Annex.Branch.get exportLog <$> Annex.Branch.get exportLog
let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) } let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) }
Annex.Branch.change exportLog $ Annex.Branch.change exportLog $
buildExportLog buildExportLog
. changeMapLog c ep new . changeMapLog c ep new
. parseExportLog . decodeBL . parseExportLog
Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree") Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree")
parseExportLog :: String -> MapLog ExportParticipants Exported parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog parseExportParticipants parseExported parseExportLog = parseMapLog exportParticipantsParser exportedParser
buildExportLog :: MapLog ExportParticipants Exported -> Builder buildExportLog :: MapLog ExportParticipants Exported -> Builder
buildExportLog = buildMapLog buildExportParticipants buildExported buildExportLog = buildMapLog buildExportParticipants buildExported
@ -113,14 +114,11 @@ buildExportParticipants ep =
where where
sep = charUtf8 ':' sep = charUtf8 ':'
parseExportParticipants :: String -> Maybe ExportParticipants exportParticipantsParser :: A.Parser ExportParticipants
parseExportParticipants s = case separate (== ':') s of exportParticipantsParser = ExportParticipants
("",_) -> Nothing <$> (toUUID <$> A8.takeWhile1 (/= ':'))
(_,"") -> Nothing <* A8.char ':'
(f,t) -> Just $ ExportParticipants <*> (toUUID <$> A8.takeWhile1 (const True))
{ exportFrom = toUUID f
, exportTo = toUUID t
}
buildExported :: Exported -> Builder buildExported :: Exported -> Builder
buildExported exported = go (exportedTreeish exported : incompleteExportedTreeish exported) buildExported exported = go (exportedTreeish exported : incompleteExportedTreeish exported)
@ -129,7 +127,8 @@ buildExported exported = go (exportedTreeish exported : incompleteExportedTreeis
go (r:rs) = rref r <> mconcat [ charUtf8 ' ' <> rref r' | r' <- rs ] go (r:rs) = rref r <> mconcat [ charUtf8 ' ' <> rref r' | r' <- rs ]
rref r = byteString (encodeBS' (Git.fromRef r)) rref r = byteString (encodeBS' (Git.fromRef r))
parseExported :: String -> Maybe Exported exportedParser :: A.Parser Exported
parseExported s = case words s of exportedParser = Exported <$> refparser <*> many refparser
(et:it) -> Just $ Exported (Git.Ref et) (map Git.Ref it) where
_ -> Nothing refparser = (Git.Ref . decodeBS <$> A8.takeWhile1 (/= ' ') )
<* ((const () <$> A8.char ' ') <|> A.endOfInput)

View file

@ -4,6 +4,8 @@
- -
- A line of the log will look like: "timestamp field value" - A line of the log will look like: "timestamp field value"
- -
- The field names cannot contain whitespace.
-
- Copyright 2014, 2019 Joey Hess <id@joeyh.name> - Copyright 2014, 2019 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
@ -19,13 +21,16 @@ import Common
import Annex.VectorClock import Annex.VectorClock
import Logs.Line import Logs.Line
import qualified Data.ByteString.Lazy as L
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder import Data.ByteString.Builder
data LogEntry v = LogEntry data LogEntry v = LogEntry
{ changed :: VectorClock { changed :: VectorClock
, value :: v , value :: v
} deriving (Eq) } deriving (Eq, Show)
type MapLog f v = M.Map f (LogEntry v) type MapLog f v = M.Map f (LogEntry v)
@ -39,17 +44,23 @@ buildMapLog fieldbuilder valuebuilder = mconcat . map genline . M.toList
sp = charUtf8 ' ' sp = charUtf8 ' '
nl = charUtf8 '\n' nl = charUtf8 '\n'
parseMapLog :: Ord f => (String -> Maybe f) -> (String -> Maybe v) -> String -> MapLog f v parseMapLog :: Ord f => A.Parser f -> A.Parser v -> L.ByteString -> MapLog f v
parseMapLog fieldparser valueparser = M.fromListWith best . mapMaybe parse . splitLines parseMapLog fieldparser valueparser = fromMaybe M.empty . A.maybeResult
. A.parse (mapLogParser fieldparser valueparser)
mapLogParser :: Ord f => A.Parser f -> A.Parser v -> A.Parser (MapLog f v)
mapLogParser fieldparser valueparser = M.fromListWith best <$> parseLogLines go
where where
parse line = do go = do
let (sc, rest) = splitword line c <- vectorClockParser
(sf, sv) = splitword rest _ <- A8.char ' '
c <- parseVectorClock sc w <- A8.takeTill (== ' ')
f <- fieldparser sf f <- either fail return $
v <- valueparser sv A.parseOnly (fieldparser <* A.endOfInput) w
Just (f, LogEntry c v) _ <- A8.char ' '
splitword = separate (== ' ') v <- valueparser
A.endOfInput
return (f, LogEntry c v)
changeMapLog :: Ord f => VectorClock -> f -> v -> MapLog f v -> MapLog f v changeMapLog :: Ord f => VectorClock -> f -> v -> MapLog f v -> MapLog f v
changeMapLog c f v = M.insert f $ LogEntry c v changeMapLog c f v = M.insert f $ LogEntry c v

View file

@ -18,6 +18,7 @@ import Types.Group
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder import Data.ByteString.Builder
{- Changes the preferred content configuration of a remote. -} {- Changes the preferred content configuration of a remote. -}
@ -51,7 +52,10 @@ groupPreferredContentSet g val = do
Annex.changeState $ \s -> s { Annex.preferredcontentmap = Nothing } Annex.changeState $ \s -> s { Annex.preferredcontentmap = Nothing }
parseGroupPreferredContent :: L.ByteString -> MapLog Group String parseGroupPreferredContent :: L.ByteString -> MapLog Group String
parseGroupPreferredContent = parseMapLog (Just . toGroup) Just . decodeBL parseGroupPreferredContent = parseMapLog parsegroup parsestring
where
parsegroup = Group <$> A.takeByteString
parsestring = decodeBS <$> A.takeByteString
buildGroupPreferredContent :: MapLog Group PreferredContentExpression -> Builder buildGroupPreferredContent :: MapLog Group PreferredContentExpression -> Builder
buildGroupPreferredContent = buildMapLog buildgroup buildexpr buildGroupPreferredContent = buildMapLog buildgroup buildexpr

View file

@ -17,6 +17,8 @@ import qualified Annex.Branch
import qualified Annex import qualified Annex
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder import Data.ByteString.Builder
type RemoteState = String type RemoteState = String
@ -26,7 +28,7 @@ setRemoteState u k s = do
c <- liftIO currentVectorClock c <- liftIO currentVectorClock
config <- Annex.getGitConfig config <- Annex.getGitConfig
Annex.Branch.change (remoteStateLogFile config k) $ Annex.Branch.change (remoteStateLogFile config k) $
buildRemoteState . changeLog c u s . parseLogNew Just . decodeBL buildRemoteState . changeLog c u s . parseRemoteState
buildRemoteState :: Log RemoteState -> Builder buildRemoteState :: Log RemoteState -> Builder
buildRemoteState = buildLogNew (byteString . encodeBS) buildRemoteState = buildLogNew (byteString . encodeBS)
@ -34,7 +36,10 @@ buildRemoteState = buildLogNew (byteString . encodeBS)
getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState) getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState)
getRemoteState u k = do getRemoteState u k = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
extract . parseLogNew Just . decodeBL extract . parseRemoteState
<$> Annex.Branch.get (remoteStateLogFile config k) <$> Annex.Branch.get (remoteStateLogFile config k)
where where
extract m = value <$> M.lookup u m extract m = value <$> M.lookup u m
parseRemoteState :: L.ByteString -> Log RemoteState
parseRemoteState = parseLogNew (decodeBS <$> A.takeByteString)

View file

@ -42,10 +42,10 @@ scheduleSet NoUUID _ = error "unknown UUID; cannot modify"
scheduleMap :: Annex (M.Map UUID [ScheduledActivity]) scheduleMap :: Annex (M.Map UUID [ScheduledActivity])
scheduleMap = simpleMap scheduleMap = simpleMap
. parseLogWithUUID parser . decodeBL . parseLog parser . decodeBL
<$> Annex.Branch.get scheduleLog <$> Annex.Branch.get scheduleLog
where where
parser _uuid = eitherToMaybe . parseScheduledActivities parser = eitherToMaybe . parseScheduledActivities
scheduleGet :: UUID -> Annex (S.Set ScheduledActivity) scheduleGet :: UUID -> Annex (S.Set ScheduledActivity)
scheduleGet u = do scheduleGet u = do

View file

@ -39,6 +39,8 @@ import Annex.VectorClock
import Logs.MapLog import Logs.MapLog
import Logs.Line import Logs.Line
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder import Data.ByteString.Builder
type Log v = MapLog UUID v type Log v = MapLog UUID v
@ -84,8 +86,8 @@ parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . splitLines
buildLogNew :: (v -> Builder) -> Log v -> Builder buildLogNew :: (v -> Builder) -> Log v -> Builder
buildLogNew = buildMapLog buildUUID buildLogNew = buildMapLog buildUUID
parseLogNew :: (String -> Maybe v) -> String -> Log v parseLogNew :: A.Parser v -> L.ByteString -> Log v
parseLogNew = parseMapLog (Just . toUUID) parseLogNew = parseMapLog (toUUID <$> A.takeByteString)
changeLog :: VectorClock -> UUID -> v -> Log v -> Log v changeLog :: VectorClock -> UUID -> v -> Log v -> Log v
changeLog = changeMapLog changeLog = changeMapLog