implement chunk logs
Slightly tricky as they are not normal UUIDBased logs, but are instead maps from (uuid, chunksize) to chunkcount. This commit was sponsored by Frank Thomas.
This commit is contained in:
parent
bbdb2c04d5
commit
e2c44bf656
8 changed files with 134 additions and 39 deletions
|
@ -12,8 +12,9 @@ module Annex.Branch.Transitions (
|
|||
|
||||
import Logs
|
||||
import Logs.Transitions
|
||||
import Logs.UUIDBased as UUIDBased
|
||||
import Logs.Presence.Pure as Presence
|
||||
import qualified Logs.UUIDBased as UUIDBased
|
||||
import qualified Logs.Presence.Pure as Presence
|
||||
import qualified Logs.Chunk.Pure as Chunk
|
||||
import Types.TrustLevel
|
||||
import Types.UUID
|
||||
|
||||
|
@ -37,9 +38,11 @@ dropDead f content trustmap = case getLogVariety f of
|
|||
-- because git remotes may still exist, and they need
|
||||
-- to still know it's dead.
|
||||
| f == trustLog -> PreserveFile
|
||||
| otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content
|
||||
| otherwise -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLog Just content
|
||||
Just NewUUIDBasedLog -> ChangeFile $
|
||||
UUIDBased.showLogNew id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLogNew Just content
|
||||
UUIDBased.showLogNew id $ dropDeadFromMapLog trustmap id $ UUIDBased.parseLogNew Just content
|
||||
Just (ChunkLog _) -> ChangeFile $
|
||||
Chunk.showLog $ dropDeadFromMapLog trustmap fst $ Chunk.parseLog content
|
||||
Just (PresenceLog _) ->
|
||||
let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content
|
||||
in if null newlog
|
||||
|
@ -48,8 +51,8 @@ dropDead f content trustmap = case getLogVariety f of
|
|||
Just OtherLog -> PreserveFile
|
||||
Nothing -> PreserveFile
|
||||
|
||||
dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
|
||||
dropDeadFromUUIDBasedLog trustmap = M.filterWithKey $ notDead trustmap . const
|
||||
dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v
|
||||
dropDeadFromMapLog trustmap getuuid = M.filterWithKey $ \k _v -> notDead trustmap getuuid k
|
||||
|
||||
{- Presence logs can contain UUIDs or other values. Any line that matches
|
||||
- a dead uuid is dropped; any other values are passed through. -}
|
||||
|
|
52
Logs.hs
52
Logs.hs
|
@ -14,6 +14,7 @@ import Types.Key
|
|||
data LogVariety
|
||||
= UUIDBasedLog
|
||||
| NewUUIDBasedLog
|
||||
| ChunkLog Key
|
||||
| PresenceLog Key
|
||||
| OtherLog
|
||||
deriving (Show)
|
||||
|
@ -24,6 +25,7 @@ getLogVariety :: FilePath -> Maybe LogVariety
|
|||
getLogVariety f
|
||||
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
|
||||
| isRemoteStateLog f = Just NewUUIDBasedLog
|
||||
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
|
||||
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
|
||||
|
||||
|
@ -133,6 +135,25 @@ remoteStateLogExt = ".log.rmt"
|
|||
isRemoteStateLog :: FilePath -> Bool
|
||||
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
|
||||
|
||||
{- The filename of the chunk log for a given key. -}
|
||||
chunkLogFile :: Key -> FilePath
|
||||
chunkLogFile key = hashDirLower key </> keyFile key ++ chunkLogExt
|
||||
|
||||
chunkLogFileKey :: FilePath -> Maybe Key
|
||||
chunkLogFileKey path
|
||||
| ext == chunkLogExt = fileKey base
|
||||
| otherwise = Nothing
|
||||
where
|
||||
file = takeFileName path
|
||||
(base, ext) = splitAt (length file - extlen) file
|
||||
extlen = length chunkLogExt
|
||||
|
||||
chunkLogExt :: String
|
||||
chunkLogExt = ".log.cnk"
|
||||
|
||||
isChunkLog :: FilePath -> Bool
|
||||
isChunkLog path = chunkLogExt `isSuffixOf` path
|
||||
|
||||
{- The filename of the metadata log for a given key. -}
|
||||
metaDataLogFile :: Key -> FilePath
|
||||
metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt
|
||||
|
@ -146,20 +167,23 @@ isMetaDataLog path = metaDataLogExt `isSuffixOf` path
|
|||
prop_logs_sane :: Key -> Bool
|
||||
prop_logs_sane dummykey = and
|
||||
[ isNothing (getLogVariety "unknown")
|
||||
, expect isUUIDBasedLog (getLogVariety uuidLog)
|
||||
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
|
||||
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
|
||||
, expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
|
||||
, expect isOtherLog (getLogVariety $ metaDataLogFile dummykey)
|
||||
, expect isOtherLog (getLogVariety $ numcopiesLog)
|
||||
, expect gotUUIDBasedLog (getLogVariety uuidLog)
|
||||
, expect gotPresenceLog (getLogVariety $ locationLogFile dummykey)
|
||||
, expect gotPresenceLog (getLogVariety $ urlLogFile dummykey)
|
||||
, expect gotNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
|
||||
, expect gotChunkLog (getLogVariety $ chunkLogFile dummykey)
|
||||
, expect gotOtherLog (getLogVariety $ metaDataLogFile dummykey)
|
||||
, expect gotOtherLog (getLogVariety $ numcopiesLog)
|
||||
]
|
||||
where
|
||||
expect = maybe False
|
||||
isUUIDBasedLog UUIDBasedLog = True
|
||||
isUUIDBasedLog _ = False
|
||||
isNewUUIDBasedLog NewUUIDBasedLog = True
|
||||
isNewUUIDBasedLog _ = False
|
||||
isPresenceLog (PresenceLog k) = k == dummykey
|
||||
isPresenceLog _ = False
|
||||
isOtherLog OtherLog = True
|
||||
isOtherLog _ = False
|
||||
gotUUIDBasedLog UUIDBasedLog = True
|
||||
gotUUIDBasedLog _ = False
|
||||
gotNewUUIDBasedLog NewUUIDBasedLog = True
|
||||
gotNewUUIDBasedLog _ = False
|
||||
gotChunkLog (ChunkLog k) = k == dummykey
|
||||
gotChunkLog _ = False
|
||||
gotPresenceLog (PresenceLog k) = k == dummykey
|
||||
gotPresenceLog _ = False
|
||||
gotOtherLog OtherLog = True
|
||||
gotOtherLog _ = False
|
||||
|
|
44
Logs/Chunk.hs
Normal file
44
Logs/Chunk.hs
Normal file
|
@ -0,0 +1,44 @@
|
|||
{- Chunk logs.
|
||||
-
|
||||
- An object can be stored in chunked for on a remote; these logs keep
|
||||
- track of the chunk size used, and the number of chunks.
|
||||
-
|
||||
- It's possible for a single object to be stored multiple times on the
|
||||
- same remote using different chunk sizes. So, while this is a MapLog, it
|
||||
- is not a normal UUIDBased log. Intead, it's a map from UUID and chunk
|
||||
- size to number of chunks.
|
||||
-
|
||||
- Format: "timestamp uuid:chunksize chunkcount"
|
||||
-
|
||||
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.Chunk where
|
||||
|
||||
import Common.Annex
|
||||
import Logs
|
||||
import Logs.MapLog
|
||||
import qualified Annex.Branch
|
||||
import Logs.Chunk.Pure
|
||||
|
||||
import qualified Data.Map as M
|
||||
import Data.Time.Clock.POSIX
|
||||
|
||||
chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex ()
|
||||
chunksStored u k chunksize chunkcount = do
|
||||
ts <- liftIO getPOSIXTime
|
||||
Annex.Branch.change (chunkLogFile k) $
|
||||
showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog
|
||||
|
||||
chunksRemoved :: UUID -> Key -> ChunkSize -> Annex ()
|
||||
chunksRemoved u k chunksize = chunksStored u k chunksize 0
|
||||
|
||||
getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)]
|
||||
getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k)
|
||||
where
|
||||
select = filter (\(_sz, ct) -> ct > 0)
|
||||
. map (\((_ku, sz), l) -> (sz, value l))
|
||||
. M.toList
|
||||
. M.filterWithKey (\(ku, _sz) _ -> ku == u)
|
32
Logs/Chunk/Pure.hs
Normal file
32
Logs/Chunk/Pure.hs
Normal file
|
@ -0,0 +1,32 @@
|
|||
{- Chunk logs, pure operations.
|
||||
-
|
||||
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.Chunk.Pure where
|
||||
|
||||
import Common.Annex
|
||||
import Logs.MapLog
|
||||
import Data.Int
|
||||
|
||||
type ChunkSize = Int64
|
||||
|
||||
type ChunkCount = Integer
|
||||
|
||||
type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount
|
||||
|
||||
parseLog :: String -> ChunkLog
|
||||
parseLog = parseMapLog fieldparser valueparser
|
||||
where
|
||||
fieldparser s =
|
||||
let (u,sz) = separate (== ':') s
|
||||
in (,) <$> pure (toUUID u) <*> readish sz
|
||||
valueparser = readish
|
||||
|
||||
showLog :: ChunkLog -> String
|
||||
showLog = showMapLog fieldshower valueshower
|
||||
where
|
||||
fieldshower (u, sz) = fromUUID u ++ ':' : show sz
|
||||
valueshower = show
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex uuid-based logs
|
||||
-
|
||||
- This is used to store information about a UUID in a way that can
|
||||
- This is used to store information about UUIDs in a way that can
|
||||
- be union merged.
|
||||
-
|
||||
- A line of the log will look like: "UUID[ INFO[ timestamp=foo]]"
|
||||
|
|
|
@ -9,12 +9,11 @@ module Remote.Helper.Chunked where
|
|||
|
||||
import Utility.DataUnits
|
||||
import Types.Remote
|
||||
import Logs.Chunk.Pure (ChunkSize)
|
||||
|
||||
import qualified Data.Map as M
|
||||
import Data.Int
|
||||
|
||||
type ChunkSize = Int64
|
||||
|
||||
data ChunkConfig
|
||||
= NoChunks
|
||||
| UnpaddedChunks ChunkSize
|
||||
|
|
|
@ -160,17 +160,12 @@ the git-annex branch.
|
|||
The location log does not record locations of individual chunk keys
|
||||
(too space-inneficient).
|
||||
Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get
|
||||
the chunk count and size for a key. File format would be:
|
||||
the chunk count and size for a key.
|
||||
|
||||
ts uuid chunksize chunkcount
|
||||
|
||||
Where a chunkcount of 0 means that the object is not longer present in the
|
||||
remote using the specified chunk size.
|
||||
|
||||
Note that a given remote uuid might have multiple lines, if a key was
|
||||
stored on it twice using different chunk sizes. Also note that even when
|
||||
this file exists for a key, the object may be stored non-chunked on the
|
||||
remote too.
|
||||
Note that a given remote uuid might have multiple chunk sizes logged, if a
|
||||
key was stored on it twice using different chunk sizes. Also note that even
|
||||
when this file exists for a key, the object may be stored non-chunked on
|
||||
the remote too.
|
||||
|
||||
`hasKey` would check if any one (chunksize, chunkcount) is satisfied by
|
||||
the files on the remote. It would also check if the non-chunked key is
|
||||
|
|
|
@ -224,16 +224,14 @@ are indicated by prefixing them with "!"
|
|||
These log files are used when objects are stored in chunked form on
|
||||
remotes. They record the size(s) of the chunks, and the number of chunks.
|
||||
|
||||
For example, this logs that a remote has an object stored using 9 chunks
|
||||
of 1 mb size:
|
||||
For example, this logs that a remote has an object stored using both
|
||||
9 chunks of 1 mb size, and 1 chunk of 10 mb size.
|
||||
|
||||
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 10240 9
|
||||
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55:10240 9
|
||||
1287290776.765153s e605dca6-446a-11e0-8b2a-002170d25c55:102400 1
|
||||
|
||||
(When those chunks are removed from the remote, the 9 is changed to 0.)
|
||||
|
||||
For future expansion, additional fields may be present following the
|
||||
number of chunks.
|
||||
|
||||
## `schedule.log`
|
||||
|
||||
Used to record scheduled events, such as periodic fscks.
|
||||
|
|
Loading…
Reference in a new issue