storing ContentIdentifier in the git-annex branch

This commit is contained in:
Joey Hess 2019-02-20 15:36:09 -04:00
parent 0442842622
commit e8bfc3640b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 84 additions and 4 deletions

View file

@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name>
© 2014 Sören Brunk
License: AGPL-3+
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Copyright: © 2011-2019 Joey Hess <id@joeyh.name>
License: AGPL-3+

16
Logs.hs
View file

@ -1,6 +1,6 @@
{- git-annex log file names
-
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -25,7 +25,7 @@ data LogVariety
getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog
| isRemoteStateLog f || isRemoteContentIdentifierLog f = Just NewUUIDBasedLog
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
| isRemoteMetaDataLog f = Just RemoteMetaDataLog
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
@ -54,7 +54,7 @@ presenceLogs f =
, locationLogFileKey f
]
{- Logs that are neither UUID based nor presence logs. -}
{- Top-level logs that are neither UUID based nor presence logs. -}
otherLogs :: [FilePath]
otherLogs =
[ numcopiesLog
@ -197,3 +197,13 @@ remoteMetaDataLogExt = ".log.rmet"
isRemoteMetaDataLog :: FilePath -> Bool
isRemoteMetaDataLog path = remoteMetaDataLogExt `isSuffixOf` path
{- The filename of the remote content identifier log for a given key. -}
remoteContentIdentifierLogFile :: GitConfig -> Key -> FilePath
remoteContentIdentifierLogFile config key = branchHashDir config key </> keyFile key ++ remoteContentIdentifierExt
remoteContentIdentifierExt :: String
remoteContentIdentifierExt = ".log.cid"
isRemoteContentIdentifierLog :: FilePath -> Bool
isRemoteContentIdentifierLog path = remoteContentIdentifierExt `isSuffixOf` path

View file

@ -0,0 +1,57 @@
{- Remote content identifier logs, pure operations.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Logs.ContentIdentifier.Pure
( ContentIdentifierLog
, parseLog
, buildLog
) where
import Annex.Common
import Logs.MapLog
import Data.Int
import Types.Remote (ContentIdentifier(..))
import Utility.Base64
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
type ContentIdentifierLog = MapLog UUID [ContentIdentifier]
buildLog :: ContentIdentifierLog -> Builder
buildLog = buildMapLog buildUUID valuebuilder
where
valuebuilder [] = mempty
valuebuilder [c] = buildcid c
valuebuilder (c:cs) = buildcid c <> charUtf8 ' ' <> valuebuilder cs
buildcid (ContentIdentifier c)
| S8.any (`elem` [' ', '\r', '\n']) c || "!" `S8.isPrefixOf` c =
charUtf8 '!' <> byteString (toB64' c)
| otherwise = byteString c
parseLog :: L.ByteString -> ContentIdentifierLog
parseLog = parseMapLog
(toUUID <$> A.takeByteString)
(reverse . catMaybes <$> valueparser [])
where
valueparser l = do
b <- A8.takeWhile1 (/= ' ')
let cid = if "!" `S8.isPrefixOf` b
then ContentIdentifier <$> fromB64Maybe' (S.drop 1 b)
else Just $ ContentIdentifier b
ifM A8.atEnd
( return (cid:l)
, do
_ <- A8.char ' '
valueparser (cid:l)
)

View file

@ -281,6 +281,18 @@ For example:
1287290776.765152s 26339d22-446b-11e0-9101-002170d25c55:x +1
1291237510.141453s 26339d22-446b-11e0-9101-002170d25c55:x -1 26339d22-446b-11e0-9101-002170d25c55:x +2
## `aaa/bbb/*.log.cid`
These log files store per-remote content identifiers for keys.
A given key may have any number of content identifiers.
The format is a timestamp, followed by the uuid or the remote,
followed by the content identifiers. If a content identifier
contains any whitespace (including \r or \n), it will be base64
encoded. Base64 encoded values are indicated by prefixing them with "!".
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 5248916 5250378
## `aaa/bbb/*.log.cnk`
These log files are used when objects are stored in chunked form on

View file

@ -869,6 +869,7 @@ Executable git-annex
Logs.Chunk
Logs.Chunk.Pure
Logs.Config
Logs.ContentIdentifier.Pure
Logs.Difference
Logs.Difference.Pure
Logs.Export