git-annex/Logs/UUID.hs
Joey Hess 9eb10caa27
Some optimisations to string splitting code.
Turns out that Data.List.Utils.split is slow and makes a lot of
allocations. Here's a much simpler single character splitter that behaves
the same (even in wacky corner cases) while running in half the time and
75% the allocations.

As well as being an optimisation, this helps move toward eliminating use of
missingh.

(Data.List.Split.splitOn is nearly as slow as Data.List.Utils.split and
allocates even more.)

I have not benchmarked the effect on git-annex, but would not be surprised
to see some parsing of eg, large streams from git commands run twice as
fast, and possibly in less memory.

This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
2017-01-31 19:06:22 -04:00

96 lines
2.8 KiB
Haskell

{- git-annex uuids
-
- Each git repository used by git-annex has an annex.uuid setting that
- uniquely identifies that repository.
-
- UUIDs of remotes are cached in git config, using keys named
- remote.<name>.annex-uuid
-
- uuid.log stores a list of known uuids, and their descriptions.
-
- Copyright 2010-2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.UUID (
uuidLog,
describeUUID,
recordUUID,
uuidMap,
uuidMapLoad
) where
import qualified Data.Map as M
import Data.Time.Clock.POSIX
import Types.UUID
import Annex.Common
import qualified Annex
import qualified Annex.Branch
import Logs
import Logs.UUIDBased
import qualified Annex.UUID
{- Records a description for a uuid in the log. -}
describeUUID :: UUID -> String -> Annex ()
describeUUID uuid desc = do
ts <- liftIO getPOSIXTime
Annex.Branch.change uuidLog $
showLog id . changeLog ts uuid desc . fixBadUUID . parseLog Just
{- Temporarily here to fix badly formatted uuid logs generated by
- versions 3.20111105 and 3.20111025.
-
- Those logs contain entries with the UUID and description flipped.
- Due to parsing, if the description is multiword, only the first
- will be taken to be the UUID. So, if the UUID of an entry does
- not look like a UUID, and the last word of the description does,
- flip them back.
-}
fixBadUUID :: Log String -> Log String
fixBadUUID = M.fromList . map fixup . M.toList
where
fixup (k, v)
| isbad = (fixeduuid, LogEntry (Date $ newertime v) fixedvalue)
| otherwise = (k, v)
where
kuuid = fromUUID k
isbad = not (isuuid kuuid) && not (null ws) && isuuid lastword
ws = words $ value v
lastword = Prelude.last ws
fixeduuid = toUUID lastword
fixedvalue = unwords $ kuuid: Prelude.init ws
-- For the fixed line to take precidence, it should be
-- slightly newer, but only slightly.
newertime (LogEntry (Date d) _) = d + minimumPOSIXTimeSlice
newertime (LogEntry Unknown _) = minimumPOSIXTimeSlice
minimumPOSIXTimeSlice = 0.000001
isuuid s = length s == 36 && length (splitc '-' s) == 5
{- Records the uuid in the log, if it's not already there. -}
recordUUID :: UUID -> Annex ()
recordUUID u = go . M.lookup u =<< uuidMap
where
go (Just "") = set
go Nothing = set
go _ = noop
set = describeUUID u ""
{- The map is cached for speed. -}
uuidMap :: Annex UUIDMap
uuidMap = maybe uuidMapLoad return =<< Annex.getState Annex.uuidmap
{- Read the uuidLog into a simple Map.
-
- The UUID of the current repository is included explicitly, since
- it may not have been described and so otherwise would not appear. -}
uuidMapLoad :: Annex UUIDMap
uuidMapLoad = do
m <- (simpleMap . parseLog Just) <$> Annex.Branch.get uuidLog
u <- Annex.UUID.getUUID
let m' = M.insertWith' preferold u "" m
Annex.changeState $ \s -> s { Annex.uuidmap = Just m' }
return m'
where
preferold = flip const