implement chunk logs

Slightly tricky as they are not normal UUIDBased logs, but are instead maps
from (uuid, chunksize) to chunkcount.

This commit was sponsored by Frank Thomas.
This commit is contained in:
Joey Hess 2014-07-24 16:23:36 -04:00
parent bbdb2c04d5
commit e2c44bf656
8 changed files with 134 additions and 39 deletions

View file

@ -12,8 +12,9 @@ module Annex.Branch.Transitions (
import Logs import Logs
import Logs.Transitions import Logs.Transitions
import Logs.UUIDBased as UUIDBased import qualified Logs.UUIDBased as UUIDBased
import Logs.Presence.Pure as Presence import qualified Logs.Presence.Pure as Presence
import qualified Logs.Chunk.Pure as Chunk
import Types.TrustLevel import Types.TrustLevel
import Types.UUID import Types.UUID
@ -37,9 +38,11 @@ dropDead f content trustmap = case getLogVariety f of
-- because git remotes may still exist, and they need -- because git remotes may still exist, and they need
-- to still know it's dead. -- to still know it's dead.
| f == trustLog -> PreserveFile | f == trustLog -> PreserveFile
| otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content | otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just content
Just NewUUIDBasedLog -> ChangeFile $ Just NewUUIDBasedLog -> ChangeFile $
UUIDBased.showLogNew id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLogNew Just content UUIDBased.showLogNew id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLogNew Just content
Just (ChunkLog _) -> ChangeFile $
Chunk.showLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog content
Just (PresenceLog _) -> Just (PresenceLog _) ->
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
in if null newlog in if null newlog
@ -48,8 +51,8 @@ dropDead f content trustmap = case getLogVariety f of
Just OtherLog -> PreserveFile Just OtherLog -> PreserveFile
Nothing -> PreserveFile Nothing -> PreserveFile
dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v
dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const dropDeadFromMapLog trustmap getuuid = M.filterWithKey $ \k _v -> notDead trustmap getuuid k
{- Presence logs can contain UUIDs or other values. Any line that matches {- Presence logs can contain UUIDs or other values. Any line that matches
- a dead uuid is dropped; any other values are passed through. -} - a dead uuid is dropped; any other values are passed through. -}

52
Logs.hs
View file

@ -14,6 +14,7 @@ import Types.Key
data LogVariety data LogVariety
= UUIDBasedLog = UUIDBasedLog
| NewUUIDBasedLog | NewUUIDBasedLog
| ChunkLog Key
| PresenceLog Key | PresenceLog Key
| OtherLog | OtherLog
deriving (Show) deriving (Show)
@ -24,6 +25,7 @@ getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog | f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog | isRemoteStateLog f = Just NewUUIDBasedLog
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog | isMetaDataLog f || f `elem` otherLogs = Just OtherLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f) | otherwise = PresenceLog <$> firstJust (presenceLogs f)
@ -133,6 +135,25 @@ remoteStateLogExt = ".log.rmt"
isRemoteStateLog :: FilePath -> Bool isRemoteStateLog :: FilePath -> Bool
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
{- The filename of the chunk log for a given key. -}
chunkLogFile :: Key -> FilePath
chunkLogFile key = hashDirLower key </> keyFile key ++ chunkLogExt
chunkLogFileKey :: FilePath -> Maybe Key
chunkLogFileKey path
| ext == chunkLogExt = fileKey base
| otherwise = Nothing
where
file = takeFileName path
(base, ext) = splitAt (length file - extlen) file
extlen = length chunkLogExt
chunkLogExt :: String
chunkLogExt = ".log.cnk"
isChunkLog :: FilePath -> Bool
isChunkLog path = chunkLogExt `isSuffixOf` path
{- The filename of the metadata log for a given key. -} {- The filename of the metadata log for a given key. -}
metaDataLogFile :: Key -> FilePath metaDataLogFile :: Key -> FilePath
metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt
@ -146,20 +167,23 @@ isMetaDataLog path = metaDataLogExt `isSuffixOf` path
prop_logs_sane :: Key -> Bool prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = and prop_logs_sane dummykey = and
[ isNothing (getLogVariety "unknown") [ isNothing (getLogVariety "unknown")
, expect isUUIDBasedLog (getLogVariety uuidLog) , expect gotUUIDBasedLog (getLogVariety uuidLog)
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey) , expect gotPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey) , expect gotPresenceLog (getLogVariety $ urlLogFile dummykey)
, expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey) , expect gotNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
, expect isOtherLog (getLogVariety $ metaDataLogFile dummykey) , expect gotChunkLog (getLogVariety $ chunkLogFile dummykey)
, expect isOtherLog (getLogVariety $ numcopiesLog) , expect gotOtherLog (getLogVariety $ metaDataLogFile dummykey)
, expect gotOtherLog (getLogVariety $ numcopiesLog)
] ]
where where
expect = maybe False expect = maybe False
isUUIDBasedLog UUIDBasedLog = True gotUUIDBasedLog UUIDBasedLog = True
isUUIDBasedLog _ = False gotUUIDBasedLog _ = False
isNewUUIDBasedLog NewUUIDBasedLog = True gotNewUUIDBasedLog NewUUIDBasedLog = True
isNewUUIDBasedLog _ = False gotNewUUIDBasedLog _ = False
isPresenceLog (PresenceLog k) = k == dummykey gotChunkLog (ChunkLog k) = k == dummykey
isPresenceLog _ = False gotChunkLog _ = False
isOtherLog OtherLog = True gotPresenceLog (PresenceLog k) = k == dummykey
isOtherLog _ = False gotPresenceLog _ = False
gotOtherLog OtherLog = True
gotOtherLog _ = False

44
Logs/Chunk.hs Normal file
View file

@ -0,0 +1,44 @@
{- Chunk logs.
-
- An object can be stored in chunked for on a remote; these logs keep
- track of the chunk size used, and the number of chunks.
-
- It's possible for a single object to be stored multiple times on the
- same remote using different chunk sizes. So, while this is a MapLog, it
- is not a normal UUIDBased log. Intead, it's a map from UUID and chunk
- size to number of chunks.
-
- Format: "timestamp uuid:chunksize chunkcount"
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Chunk where
import Common.Annex
import Logs
import Logs.MapLog
import qualified Annex.Branch
import Logs.Chunk.Pure
import qualified Data.Map as M
import Data.Time.Clock.POSIX
chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex ()
chunksStored u k chunksize chunkcount = do
ts <- liftIO getPOSIXTime
Annex.Branch.change (chunkLogFile k) $
showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkSize -> Annex ()
chunksRemoved u k chunksize = chunksStored u k chunksize 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)]
getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k)
where
select = filter (\(_sz, ct) -> ct > 0)
. map (\((_ku, sz), l) -> (sz, value l))
. M.toList
. M.filterWithKey (\(ku, _sz) _ -> ku == u)

32
Logs/Chunk/Pure.hs Normal file
View file

@ -0,0 +1,32 @@
{- Chunk logs, pure operations.
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Chunk.Pure where
import Common.Annex
import Logs.MapLog
import Data.Int
type ChunkSize = Int64
type ChunkCount = Integer
type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount
parseLog :: String -> ChunkLog
parseLog = parseMapLog fieldparser valueparser
where
fieldparser s =
let (u,sz) = separate (== ':') s
in (,) <$> pure (toUUID u) <*> readish sz
valueparser = readish
showLog :: ChunkLog -> String
showLog = showMapLog fieldshower valueshower
where
fieldshower (u, sz) = fromUUID u ++ ':' : show sz
valueshower = show

View file

@ -1,6 +1,6 @@
{- git-annex uuid-based logs {- git-annex uuid-based logs
- -
- This is used to store information about a UUID in a way that can - This is used to store information about UUIDs in a way that can
- be union merged. - be union merged.
- -
- A line of the log will look like: "UUID[ INFO[ timestamp=foo]]" - A line of the log will look like: "UUID[ INFO[ timestamp=foo]]"

View file

@ -9,12 +9,11 @@ module Remote.Helper.Chunked where
import Utility.DataUnits import Utility.DataUnits
import Types.Remote import Types.Remote
import Logs.Chunk.Pure (ChunkSize)
import qualified Data.Map as M import qualified Data.Map as M
import Data.Int import Data.Int
type ChunkSize = Int64
data ChunkConfig data ChunkConfig
= NoChunks = NoChunks
| UnpaddedChunks ChunkSize | UnpaddedChunks ChunkSize

View file

@ -160,17 +160,12 @@ the git-annex branch.
The location log does not record locations of individual chunk keys The location log does not record locations of individual chunk keys
(too space-inneficient). (too space-inneficient).
Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get
the chunk count and size for a key. File format would be: the chunk count and size for a key.
ts uuid chunksize chunkcount Note that a given remote uuid might have multiple chunk sizes logged, if a
key was stored on it twice using different chunk sizes. Also note that even
Where a chunkcount of 0 means that the object is not longer present in the when this file exists for a key, the object may be stored non-chunked on
remote using the specified chunk size. the remote too.
Note that a given remote uuid might have multiple lines, if a key was
stored on it twice using different chunk sizes. Also note that even when
this file exists for a key, the object may be stored non-chunked on the
remote too.
`hasKey` would check if any one (chunksize, chunkcount) is satisfied by `hasKey` would check if any one (chunksize, chunkcount) is satisfied by
the files on the remote. It would also check if the non-chunked key is the files on the remote. It would also check if the non-chunked key is

View file

@ -224,16 +224,14 @@ are indicated by prefixing them with "!"
These log files are used when objects are stored in chunked form on These log files are used when objects are stored in chunked form on
remotes. They record the size(s) of the chunks, and the number of chunks. remotes. They record the size(s) of the chunks, and the number of chunks.
For example, this logs that a remote has an object stored using 9 chunks For example, this logs that a remote has an object stored using both
of 1 mb size: 9 chunks of 1 mb size, and 1 chunk of 10 mb size.
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 10240 9 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55:10240 9
1287290776.765153s e605dca6-446a-11e0-8b2a-002170d25c55:102400 1
(When those chunks are removed from the remote, the 9 is changed to 0.) (When those chunks are removed from the remote, the 9 is changed to 0.)
For future expansion, additional fields may be present following the
number of chunks.
## `schedule.log` ## `schedule.log`
Used to record scheduled events, such as periodic fscks. Used to record scheduled events, such as periodic fscks.