implement chunk logs

Slightly tricky as they are not normal UUIDBased logs, but are instead maps
from (uuid, chunksize) to chunkcount.

This commit was sponsored by Frank Thomas.
This commit is contained in:
Joey Hess 2014-07-24 16:23:36 -04:00
parent bbdb2c04d5
commit e2c44bf656
8 changed files with 134 additions and 39 deletions

View file

@ -12,8 +12,9 @@ module Annex.Branch.Transitions (
import Logs
import Logs.Transitions
import Logs.UUIDBased as UUIDBased
import Logs.Presence.Pure as Presence
import qualified Logs.UUIDBased as UUIDBased
import qualified Logs.Presence.Pure as Presence
import qualified Logs.Chunk.Pure as Chunk
import Types.TrustLevel
import Types.UUID
@ -37,9 +38,11 @@ dropDead f content trustmap = case getLogVariety f of
-- because git remotes may still exist, and they need
-- to still know it's dead.
| f == trustLog -> PreserveFile
| otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content
| otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just content
Just NewUUIDBasedLog -> ChangeFile $
UUIDBased.showLogNew id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLogNew Just content
UUIDBased.showLogNew id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLogNew Just content
Just (ChunkLog _) -> ChangeFile $
Chunk.showLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog content
Just (PresenceLog _) ->
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
in if null newlog
@ -48,8 +51,8 @@ dropDead f content trustmap = case getLogVariety f of
Just OtherLog -> PreserveFile
Nothing -> PreserveFile
dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const
dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v
dropDeadFromMapLog trustmap getuuid = M.filterWithKey $ \k _v -> notDead trustmap getuuid k
{- Presence logs can contain UUIDs or other values. Any line that matches
- a dead uuid is dropped; any other values are passed through. -}

52
Logs.hs
View file

@ -14,6 +14,7 @@ import Types.Key
data LogVariety
= UUIDBasedLog
| NewUUIDBasedLog
| ChunkLog Key
| PresenceLog Key
| OtherLog
deriving (Show)
@ -24,6 +25,7 @@ getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
@ -133,6 +135,25 @@ remoteStateLogExt = ".log.rmt"
isRemoteStateLog :: FilePath -> Bool
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
{- The filename of the chunk log for a given key. -}
chunkLogFile :: Key -> FilePath
chunkLogFile key = hashDirLower key </> keyFile key ++ chunkLogExt
chunkLogFileKey :: FilePath -> Maybe Key
chunkLogFileKey path
| ext == chunkLogExt = fileKey base
| otherwise = Nothing
where
file = takeFileName path
(base, ext) = splitAt (length file - extlen) file
extlen = length chunkLogExt
chunkLogExt :: String
chunkLogExt = ".log.cnk"
isChunkLog :: FilePath -> Bool
isChunkLog path = chunkLogExt `isSuffixOf` path
{- The filename of the metadata log for a given key. -}
metaDataLogFile :: Key -> FilePath
metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt
@ -146,20 +167,23 @@ isMetaDataLog path = metaDataLogExt `isSuffixOf` path
prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = and
[ isNothing (getLogVariety "unknown")
, expect isUUIDBasedLog (getLogVariety uuidLog)
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
, expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
, expect isOtherLog (getLogVariety $ metaDataLogFile dummykey)
, expect isOtherLog (getLogVariety $ numcopiesLog)
, expect gotUUIDBasedLog (getLogVariety uuidLog)
, expect gotPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect gotPresenceLog (getLogVariety $ urlLogFile dummykey)
, expect gotNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
, expect gotChunkLog (getLogVariety $ chunkLogFile dummykey)
, expect gotOtherLog (getLogVariety $ metaDataLogFile dummykey)
, expect gotOtherLog (getLogVariety $ numcopiesLog)
]
where
expect = maybe False
isUUIDBasedLog UUIDBasedLog = True
isUUIDBasedLog _ = False
isNewUUIDBasedLog NewUUIDBasedLog = True
isNewUUIDBasedLog _ = False
isPresenceLog (PresenceLog k) = k == dummykey
isPresenceLog _ = False
isOtherLog OtherLog = True
isOtherLog _ = False
gotUUIDBasedLog UUIDBasedLog = True
gotUUIDBasedLog _ = False
gotNewUUIDBasedLog NewUUIDBasedLog = True
gotNewUUIDBasedLog _ = False
gotChunkLog (ChunkLog k) = k == dummykey
gotChunkLog _ = False
gotPresenceLog (PresenceLog k) = k == dummykey
gotPresenceLog _ = False
gotOtherLog OtherLog = True
gotOtherLog _ = False

44
Logs/Chunk.hs Normal file
View file

@ -0,0 +1,44 @@
{- Chunk logs.
-
- An object can be stored in chunked for on a remote; these logs keep
- track of the chunk size used, and the number of chunks.
-
- It's possible for a single object to be stored multiple times on the
- same remote using different chunk sizes. So, while this is a MapLog, it
- is not a normal UUIDBased log. Intead, it's a map from UUID and chunk
- size to number of chunks.
-
- Format: "timestamp uuid:chunksize chunkcount"
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Chunk where
import Common.Annex
import Logs
import Logs.MapLog
import qualified Annex.Branch
import Logs.Chunk.Pure
import qualified Data.Map as M
import Data.Time.Clock.POSIX
chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex ()
chunksStored u k chunksize chunkcount = do
ts <- liftIO getPOSIXTime
Annex.Branch.change (chunkLogFile k) $
showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkSize -> Annex ()
chunksRemoved u k chunksize = chunksStored u k chunksize 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)]
getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k)
where
select = filter (\(_sz, ct) -> ct > 0)
. map (\((_ku, sz), l) -> (sz, value l))
. M.toList
. M.filterWithKey (\(ku, _sz) _ -> ku == u)

32
Logs/Chunk/Pure.hs Normal file
View file

@ -0,0 +1,32 @@
{- Chunk logs, pure operations.
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Chunk.Pure where
import Common.Annex
import Logs.MapLog
import Data.Int
type ChunkSize = Int64
type ChunkCount = Integer
type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount
parseLog :: String -> ChunkLog
parseLog = parseMapLog fieldparser valueparser
where
fieldparser s =
let (u,sz) = separate (== ':') s
in (,) <$> pure (toUUID u) <*> readish sz
valueparser = readish
showLog :: ChunkLog -> String
showLog = showMapLog fieldshower valueshower
where
fieldshower (u, sz) = fromUUID u ++ ':' : show sz
valueshower = show

View file

@ -1,6 +1,6 @@
{- git-annex uuid-based logs
-
- This is used to store information about a UUID in a way that can
- This is used to store information about UUIDs in a way that can
- be union merged.
-
- A line of the log will look like: "UUID[ INFO[ timestamp=foo]]"

View file

@ -9,12 +9,11 @@ module Remote.Helper.Chunked where
import Utility.DataUnits
import Types.Remote
import Logs.Chunk.Pure (ChunkSize)
import qualified Data.Map as M
import Data.Int
type ChunkSize = Int64
data ChunkConfig
= NoChunks
| UnpaddedChunks ChunkSize

View file

@ -160,17 +160,12 @@ the git-annex branch.
The location log does not record locations of individual chunk keys
(too space-inneficient).
Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get
the chunk count and size for a key. File format would be:
the chunk count and size for a key.
ts uuid chunksize chunkcount
Where a chunkcount of 0 means that the object is not longer present in the
remote using the specified chunk size.
Note that a given remote uuid might have multiple lines, if a key was
stored on it twice using different chunk sizes. Also note that even when
this file exists for a key, the object may be stored non-chunked on the
remote too.
Note that a given remote uuid might have multiple chunk sizes logged, if a
key was stored on it twice using different chunk sizes. Also note that even
when this file exists for a key, the object may be stored non-chunked on
the remote too.
`hasKey` would check if any one (chunksize, chunkcount) is satisfied by
the files on the remote. It would also check if the non-chunked key is

View file

@ -224,16 +224,14 @@ are indicated by prefixing them with "!"
These log files are used when objects are stored in chunked form on
remotes. They record the size(s) of the chunks, and the number of chunks.
For example, this logs that a remote has an object stored using 9 chunks
of 1 mb size:
For example, this logs that a remote has an object stored using both
9 chunks of 1 mb size, and 1 chunk of 10 mb size.
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 10240 9
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55:10240 9
1287290776.765153s e605dca6-446a-11e0-8b2a-002170d25c55:102400 1
(When those chunks are removed from the remote, the 9 is changed to 0.)
For future expansion, additional fields may be present following the
number of chunks.
## `schedule.log`
Used to record scheduled events, such as periodic fscks.