attoparsec parsers for all new-format uuid-based logs

There should be some speed gains here, especially for chunk and remote
state logs, which are queried once per key.

Now only old-format uuid-based logs still need to be converted to attoparsec.
This commit is contained in:
Joey Hess 2019-01-10 13:23:42 -04:00
parent 7e54c215b4
commit 66603d6f75
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
10 changed files with 88 additions and 56 deletions

View file

@ -24,6 +24,7 @@ import Types.MetaData
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
data FileTransition
@ -48,11 +49,11 @@ dropDead f content trustmap = case getLogVariety f of
UUIDBased.buildLog (byteString . encodeBS) $
dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just (decodeBL content)
Just NewUUIDBasedLog -> ChangeFile $
UUIDBased.buildLogNew (byteString . encodeBS) $
UUIDBased.buildLogNew byteString $
dropDeadFromMapLog trustmap id $
UUIDBased.parseLogNew Just (decodeBL content)
UUIDBased.parseLogNew A.takeByteString content
Just (ChunkLog _) -> ChangeFile $
Chunk.buildLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog (decodeBL content)
Chunk.buildLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog content
Just (PresenceLog _) ->
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
in if null newlog

View file

@ -38,7 +38,7 @@ chunksStored u k chunkmethod chunkcount = do
c <- liftIO currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (chunkLogFile config k) $
buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog . decodeBL
buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
@ -46,7 +46,7 @@ chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)]
getCurrentChunks u k = do
config <- Annex.getGitConfig
select . parseLog . decodeBL <$> Annex.Branch.get (chunkLogFile config k)
select . parseLog <$> Annex.Branch.get (chunkLogFile config k)
where
select = filter (\(_m, ct) -> ct > 0)
. map (\((_ku, m), l) -> (m, value l))

View file

@ -19,6 +19,9 @@ import Logs.MapLog
import Data.Int
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
-- Currently chunks are all fixed size, but other chunking methods
@ -33,20 +36,13 @@ type ChunkCount = Integer
type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount
parseChunkMethod :: String -> ChunkMethod
parseChunkMethod s = maybe (UnknownChunks $ encodeBS s) FixedSizeChunks (readish s)
buildChunkMethod :: ChunkMethod -> Builder
buildChunkMethod (FixedSizeChunks sz) = int64Dec sz
buildChunkMethod (UnknownChunks s) = byteString s
parseLog :: String -> ChunkLog
parseLog = parseMapLog fieldparser valueparser
where
fieldparser s =
let (u,m) = separate (== ':') s
in Just (toUUID u, parseChunkMethod m)
valueparser = readish
chunkMethodParser :: A.Parser ChunkMethod
chunkMethodParser =
(FixedSizeChunks <$> A8.decimal) <|> (UnknownChunks <$> A.takeByteString)
buildLog :: ChunkLog -> Builder
buildLog = buildMapLog fieldbuilder valuebuilder
@ -54,3 +50,13 @@ buildLog = buildMapLog fieldbuilder valuebuilder
fieldbuilder (u, m) = buildUUID u <> sep <> buildChunkMethod m
valuebuilder = integerDec
sep = charUtf8 ':'
parseLog :: L.ByteString -> ChunkLog
parseLog = parseMapLog fieldparser valueparser
where
fieldparser = (,)
<$> (toUUID <$> A8.takeTill (== ':'))
<* A8.char ':'
<*> chunkMethodParser
<* A.endOfInput
valueparser = A8.decimal

View file

@ -20,6 +20,8 @@ import Logs.MapLog
import qualified Annex.Branch
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
type ConfigName = String
@ -35,7 +37,7 @@ setGlobalConfig' :: ConfigName -> ConfigValue -> Annex ()
setGlobalConfig' name new = do
c <- liftIO currentVectorClock
Annex.Branch.change configLog $
buildGlobalConfig . changeMapLog c name new . parseGlobalConfig . decodeBL
buildGlobalConfig . changeMapLog c name new . parseGlobalConfig
unsetGlobalConfig :: ConfigName -> Annex ()
unsetGlobalConfig name = do
@ -53,9 +55,11 @@ buildGlobalConfig = buildMapLog fieldbuilder valuebuilder
fieldbuilder = byteString . encodeBS
valuebuilder = byteString . encodeBS
parseGlobalConfig :: String -> MapLog ConfigName ConfigValue
parseGlobalConfig = parseMapLog Just Just
parseGlobalConfig :: L.ByteString -> MapLog ConfigName ConfigValue
parseGlobalConfig = parseMapLog string string
where
string = decodeBS <$> A.takeByteString
loadGlobalConfig :: Annex (M.Map ConfigName ConfigValue)
loadGlobalConfig = M.filter (not . null) . simpleMap . parseGlobalConfig . decodeBL
loadGlobalConfig = M.filter (not . null) . simpleMap . parseGlobalConfig
<$> Annex.Branch.get configLog

View file

@ -18,6 +18,9 @@ import Logs
import Logs.MapLog
import Annex.UUID
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
data Exported = Exported
@ -30,7 +33,7 @@ data ExportParticipants = ExportParticipants
{ exportFrom :: UUID
, exportTo :: UUID
}
deriving (Eq, Ord)
deriving (Eq, Ord, Show)
data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref]
@ -44,7 +47,6 @@ data ExportChange = ExportChange
getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap
. parseExportLog
. decodeBL
<$> Annex.Branch.get exportLog
where
get (ep, exported)
@ -74,7 +76,7 @@ recordExport remoteuuid ec = do
buildExportLog
. changeMapLog c ep exported
. M.mapWithKey (updateothers c u)
. parseExportLog . decodeBL
. parseExportLog
where
updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t }))
| u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le
@ -92,17 +94,16 @@ recordExportBeginning remoteuuid newtree = do
old <- fromMaybe (Exported emptyTree [])
. M.lookup ep . simpleMap
. parseExportLog
. decodeBL
<$> Annex.Branch.get exportLog
let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) }
Annex.Branch.change exportLog $
buildExportLog
. changeMapLog c ep new
. parseExportLog . decodeBL
. parseExportLog
Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree")
parseExportLog :: String -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog parseExportParticipants parseExported
parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog exportParticipantsParser exportedParser
buildExportLog :: MapLog ExportParticipants Exported -> Builder
buildExportLog = buildMapLog buildExportParticipants buildExported
@ -113,14 +114,11 @@ buildExportParticipants ep =
where
sep = charUtf8 ':'
parseExportParticipants :: String -> Maybe ExportParticipants
parseExportParticipants s = case separate (== ':') s of
("",_) -> Nothing
(_,"") -> Nothing
(f,t) -> Just $ ExportParticipants
{ exportFrom = toUUID f
, exportTo = toUUID t
}
exportParticipantsParser :: A.Parser ExportParticipants
exportParticipantsParser = ExportParticipants
<$> (toUUID <$> A8.takeWhile1 (/= ':'))
<* A8.char ':'
<*> (toUUID <$> A8.takeWhile1 (const True))
buildExported :: Exported -> Builder
buildExported exported = go (exportedTreeish exported : incompleteExportedTreeish exported)
@ -129,7 +127,8 @@ buildExported exported = go (exportedTreeish exported : incompleteExportedTreeis
go (r:rs) = rref r <> mconcat [ charUtf8 ' ' <> rref r' | r' <- rs ]
rref r = byteString (encodeBS' (Git.fromRef r))
parseExported :: String -> Maybe Exported
parseExported s = case words s of
(et:it) -> Just $ Exported (Git.Ref et) (map Git.Ref it)
_ -> Nothing
exportedParser :: A.Parser Exported
exportedParser = Exported <$> refparser <*> many refparser
where
refparser = (Git.Ref . decodeBS <$> A8.takeWhile1 (/= ' ') )
<* ((const () <$> A8.char ' ') <|> A.endOfInput)

View file

@ -4,6 +4,8 @@
-
- A line of the log will look like: "timestamp field value"
-
- The field names cannot contain whitespace.
-
- Copyright 2014, 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
@ -19,13 +21,16 @@ import Common
import Annex.VectorClock
import Logs.Line
import qualified Data.ByteString.Lazy as L
import qualified Data.Map.Strict as M
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
data LogEntry v = LogEntry
{ changed :: VectorClock
, value :: v
} deriving (Eq)
} deriving (Eq, Show)
type MapLog f v = M.Map f (LogEntry v)
@ -39,17 +44,23 @@ buildMapLog fieldbuilder valuebuilder = mconcat . map genline . M.toList
sp = charUtf8 ' '
nl = charUtf8 '\n'
parseMapLog :: Ord f => (String -> Maybe f) -> (String -> Maybe v) -> String -> MapLog f v
parseMapLog fieldparser valueparser = M.fromListWith best . mapMaybe parse . splitLines
parseMapLog :: Ord f => A.Parser f -> A.Parser v -> L.ByteString -> MapLog f v
parseMapLog fieldparser valueparser = fromMaybe M.empty . A.maybeResult
. A.parse (mapLogParser fieldparser valueparser)
mapLogParser :: Ord f => A.Parser f -> A.Parser v -> A.Parser (MapLog f v)
mapLogParser fieldparser valueparser = M.fromListWith best <$> parseLogLines go
where
parse line = do
let (sc, rest) = splitword line
(sf, sv) = splitword rest
c <- parseVectorClock sc
f <- fieldparser sf
v <- valueparser sv
Just (f, LogEntry c v)
splitword = separate (== ' ')
go = do
c <- vectorClockParser
_ <- A8.char ' '
w <- A8.takeTill (== ' ')
f <- either fail return $
A.parseOnly (fieldparser <* A.endOfInput) w
_ <- A8.char ' '
v <- valueparser
A.endOfInput
return (f, LogEntry c v)
changeMapLog :: Ord f => VectorClock -> f -> v -> MapLog f v -> MapLog f v
changeMapLog c f v = M.insert f $ LogEntry c v

View file

@ -18,6 +18,7 @@ import Types.Group
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
{- Changes the preferred content configuration of a remote. -}
@ -51,7 +52,10 @@ groupPreferredContentSet g val = do
Annex.changeState $ \s -> s { Annex.preferredcontentmap = Nothing }
parseGroupPreferredContent :: L.ByteString -> MapLog Group String
parseGroupPreferredContent = parseMapLog (Just . toGroup) Just . decodeBL
parseGroupPreferredContent = parseMapLog parsegroup parsestring
where
parsegroup = Group <$> A.takeByteString
parsestring = decodeBS <$> A.takeByteString
buildGroupPreferredContent :: MapLog Group PreferredContentExpression -> Builder
buildGroupPreferredContent = buildMapLog buildgroup buildexpr

View file

@ -17,6 +17,8 @@ import qualified Annex.Branch
import qualified Annex
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
type RemoteState = String
@ -26,7 +28,7 @@ setRemoteState u k s = do
c <- liftIO currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (remoteStateLogFile config k) $
buildRemoteState . changeLog c u s . parseLogNew Just . decodeBL
buildRemoteState . changeLog c u s . parseRemoteState
buildRemoteState :: Log RemoteState -> Builder
buildRemoteState = buildLogNew (byteString . encodeBS)
@ -34,7 +36,10 @@ buildRemoteState = buildLogNew (byteString . encodeBS)
getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState)
getRemoteState u k = do
config <- Annex.getGitConfig
extract . parseLogNew Just . decodeBL
extract . parseRemoteState
<$> Annex.Branch.get (remoteStateLogFile config k)
where
extract m = value <$> M.lookup u m
parseRemoteState :: L.ByteString -> Log RemoteState
parseRemoteState = parseLogNew (decodeBS <$> A.takeByteString)

View file

@ -42,10 +42,10 @@ scheduleSet NoUUID _ = error "unknown UUID; cannot modify"
scheduleMap :: Annex (M.Map UUID [ScheduledActivity])
scheduleMap = simpleMap
. parseLogWithUUID parser . decodeBL
. parseLog parser . decodeBL
<$> Annex.Branch.get scheduleLog
where
parser _uuid = eitherToMaybe . parseScheduledActivities
parser = eitherToMaybe . parseScheduledActivities
scheduleGet :: UUID -> Annex (S.Set ScheduledActivity)
scheduleGet u = do

View file

@ -39,6 +39,8 @@ import Annex.VectorClock
import Logs.MapLog
import Logs.Line
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
type Log v = MapLog UUID v
@ -84,8 +86,8 @@ parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . splitLines
buildLogNew :: (v -> Builder) -> Log v -> Builder
buildLogNew = buildMapLog buildUUID
parseLogNew :: (String -> Maybe v) -> String -> Log v
parseLogNew = parseMapLog (Just . toUUID)
parseLogNew :: A.Parser v -> L.ByteString -> Log v
parseLogNew = parseMapLog (toUUID <$> A.takeByteString)
changeLog :: VectorClock -> UUID -> v -> Log v -> Log v
changeLog = changeMapLog