follow-on changes from MetaData type changes

Including writing and parsing the metadata log files with
bytestring-builder and attoparsec.
This commit is contained in:
Joey Hess 2019-01-07 15:51:05 -04:00
parent 16c798b5ef
commit cb375977a6
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
14 changed files with 102 additions and 81 deletions

View file

@ -56,10 +56,10 @@ dropDead f content trustmap = case getLogVariety f of
then RemoveFile
else ChangeFile $ toLazyByteString $ Presence.buildLog newlog
Just RemoteMetaDataLog ->
let newlog = dropDeadFromRemoteMetaDataLog trustmap $ MetaData.simplifyLog $ MetaData.parseLog (decodeBL content)
let newlog = dropDeadFromRemoteMetaDataLog trustmap $ MetaData.simplifyLog $ MetaData.parseLog content
in if S.null newlog
then RemoveFile
else ChangeFile $ encodeBL $ MetaData.showLog newlog
else ChangeFile $ toLazyByteString $ MetaData.buildLog newlog
Just OtherLog -> PreserveFile
Nothing -> PreserveFile

View file

@ -22,6 +22,7 @@ import Annex.CatFile
import Utility.Glob
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Time.Calendar
import Data.Time.Clock
import Data.Time.Clock.POSIX
@ -71,30 +72,30 @@ genMetaData key file status = do
- only changes to add the date fields. -}
dateMetaData :: UTCTime -> MetaData -> MetaData
dateMetaData mtime old = modMeta old $
(SetMeta yearMetaField $ S.singleton $ toMetaValue $ show y)
(SetMeta yearMetaField $ S.singleton $ toMetaValue $ encodeBS' $ show y)
`ComposeModMeta`
(SetMeta monthMetaField $ S.singleton $ toMetaValue $ show m)
(SetMeta monthMetaField $ S.singleton $ toMetaValue $ encodeBS' $ show m)
`ComposeModMeta`
(SetMeta dayMetaField $ S.singleton $ toMetaValue $ show d)
(SetMeta dayMetaField $ S.singleton $ toMetaValue $ encodeBS' $ show d)
where
(y, m, d) = toGregorian $ utctDay mtime
{- Parses field=value, field+=value, field-=value, field?=value -}
parseModMeta :: String -> Either String ModMeta
parseModMeta p = case lastMaybe f of
Just '+' -> AddMeta <$> mkMetaField f' <*> v
Just '-' -> DelMeta <$> mkMetaField f' <*> (Just <$> v)
Just '?' -> MaybeSetMeta <$> mkMetaField f' <*> v
_ -> SetMeta <$> mkMetaField f <*> (S.singleton <$> v)
Just '+' -> AddMeta <$> mkMetaField (T.pack f') <*> v
Just '-' -> DelMeta <$> mkMetaField (T.pack f') <*> (Just <$> v)
Just '?' -> MaybeSetMeta <$> mkMetaField (T.pack f') <*> v
_ -> SetMeta <$> mkMetaField (T.pack f) <*> (S.singleton <$> v)
where
(f, sv) = separate (== '=') p
f' = beginning f
v = pure (toMetaValue sv)
v = pure (toMetaValue (encodeBS sv))
{- Parses field=value, field<value, field<=value, field>value, field>=value -}
parseMetaDataMatcher :: String -> Either String (MetaField, MetaValue -> Bool)
parseMetaDataMatcher p = (,)
<$> mkMetaField f
<$> mkMetaField (T.pack f)
<*> pure matcher
where
(f, op_v) = break (`elem` "=<>") p
@ -107,8 +108,8 @@ parseMetaDataMatcher p = (,)
_ -> checkglob ""
checkglob v =
let cglob = compileGlob v CaseInsensative
in matchGlob cglob . fromMetaValue
checkcmp cmp v v' = case (doubleval v, doubleval (fromMetaValue v')) of
in matchGlob cglob . decodeBS . fromMetaValue
checkcmp cmp v v' = case (doubleval v, doubleval (decodeBS (fromMetaValue v'))) of
(Just d, Just d') -> d' `cmp` d
_ -> False
doubleval v = readish v :: Maybe Double

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.MetaData.StandardFields (
tagMetaField,
yearMetaField,
@ -18,7 +20,7 @@ module Annex.MetaData.StandardFields (
import Types.MetaData
import Data.List
import qualified Data.Text as T
tagMetaField :: MetaField
tagMetaField = mkMetaFieldUnchecked "tag"
@ -43,17 +45,17 @@ lastChangedField :: MetaField
lastChangedField = mkMetaFieldUnchecked lastchanged
mkLastChangedField :: MetaField -> MetaField
mkLastChangedField f = mkMetaFieldUnchecked (fromMetaField f ++ lastchangedSuffix)
mkLastChangedField f = mkMetaFieldUnchecked (fromMetaField f <> lastchangedSuffix)
isLastChangedField :: MetaField -> Bool
isLastChangedField f
| f == lastChangedField = True
| otherwise = lastchanged `isSuffixOf` s && s /= lastchangedSuffix
| otherwise = lastchanged `T.isSuffixOf` s && s /= lastchangedSuffix
where
s = fromMetaField f
lastchanged :: String
lastchanged :: T.Text
lastchanged = "lastchanged"
lastchangedSuffix :: String
lastchangedSuffix :: T.Text
lastchangedSuffix = "-lastchanged"

View file

@ -31,6 +31,8 @@ import Utility.Glob
import Types.Command
import CmdLine.Action
import qualified Data.Text as T
import qualified Data.ByteString as B
import qualified Data.Set as S
import qualified Data.Map as M
import "mtl" Control.Monad.Writer
@ -68,18 +70,18 @@ parseViewParam s = case separate (== '=') s of
)
(field, wanted)
| end field == "!" ->
( mkMetaFieldUnchecked (beginning field)
( mkMetaFieldUnchecked (T.pack (beginning field))
, mkExcludeValues wanted
)
| otherwise ->
( mkMetaFieldUnchecked field
( mkMetaFieldUnchecked (T.pack field)
, mkFilterValues wanted
)
where
mkFilterValues v
| any (`elem` v) "*?" = FilterGlob v
| otherwise = FilterValues $ S.singleton $ toMetaValue v
mkExcludeValues = ExcludeValues . S.singleton . toMetaValue
| otherwise = FilterValues $ S.singleton $ toMetaValue $ encodeBS v
mkExcludeValues = ExcludeValues . S.singleton . toMetaValue . encodeBS
data ViewChange = Unchanged | Narrowing | Widening
deriving (Ord, Eq, Show)
@ -156,7 +158,7 @@ combineViewFilter old@(ExcludeValues olds) (ExcludeValues news)
combineViewFilter (FilterValues _) newglob@(FilterGlob _) =
(newglob, Widening)
combineViewFilter (FilterGlob oldglob) new@(FilterValues s)
| all (matchGlob (compileGlob oldglob CaseInsensative) . fromMetaValue) (S.toList s) = (new, Narrowing)
| all (matchGlob (compileGlob oldglob CaseInsensative) . decodeBS . fromMetaValue) (S.toList s) = (new, Narrowing)
| otherwise = (new, Widening)
combineViewFilter (FilterGlob old) newglob@(FilterGlob new)
| old == new = (newglob, Unchanged)
@ -211,7 +213,7 @@ viewComponentMatcher viewcomponent = \metadata ->
FilterGlob glob ->
let cglob = compileGlob glob CaseInsensative
in \values -> setmatches $
S.filter (matchGlob cglob . fromMetaValue) values
S.filter (matchGlob cglob . decodeBS . fromMetaValue) values
ExcludeValues excludes -> \values ->
if S.null (S.intersection values excludes)
then Just []
@ -231,7 +233,7 @@ pseudoBackslash :: String
pseudoBackslash = "\56546\56469\56498"
toViewPath :: MetaValue -> FilePath
toViewPath = escapeslash [] . fromMetaValue
toViewPath = escapeslash [] . decodeBS . fromMetaValue
where
escapeslash s ('/':cs) = escapeslash (pseudoSlash:s) cs
escapeslash s ('\\':cs) = escapeslash (pseudoBackslash:s) cs
@ -243,7 +245,7 @@ toViewPath = escapeslash [] . fromMetaValue
escapeslash s cs = concat (reverse (cs:s))
fromViewPath :: FilePath -> MetaValue
fromViewPath = toMetaValue . deescapeslash []
fromViewPath = toMetaValue . encodeBS . deescapeslash []
where
deescapeslash s ('%':escapedc:cs) = deescapeslash ([escapedc]:s) cs
deescapeslash s (c1:c2:c3:cs)
@ -285,7 +287,7 @@ prop_view_roundtrips f metadata visible = null f || viewTooLarge view ||
all hasfields (viewedFiles view viewedFileFromReference f metadata)
where
view = View (Git.Ref "master") $
map (\(mf, mv) -> ViewComponent mf (FilterValues $ S.filter (not . null . fromMetaValue) mv) visible)
map (\(mf, mv) -> ViewComponent mf (FilterValues $ S.filter (not . B.null . fromMetaValue) mv) visible)
(fromMetaData metadata)
visiblefields = sort (map viewField $ filter viewVisible (viewComponents view))
hasfields fv = sort (map fst (fromMetaData (fromView view fv))) == visiblefields
@ -300,9 +302,9 @@ getDirMetaData :: FilePath -> MetaData
getDirMetaData d = MetaData $ M.fromList $ zip fields values
where
dirs = splitDirectories d
fields = map (mkMetaFieldUnchecked . addTrailingPathSeparator . joinPath)
fields = map (mkMetaFieldUnchecked . T.pack . addTrailingPathSeparator . joinPath)
(inits dirs)
values = map (S.singleton . toMetaValue . fromMaybe "" . headMaybe)
values = map (S.singleton . toMetaValue . encodeBS . fromMaybe "" . headMaybe)
(tails dirs)
getWorkTreeMetaData :: FilePath -> MetaData

View file

@ -6,6 +6,7 @@
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Command.ImportFeed where
@ -124,7 +125,8 @@ getCache opttemplate = ifM (Annex.getState Annex.force)
knownItems :: (Key, URLString) -> Annex ([ItemId], URLString)
knownItems (k, u) = do
itemids <- S.toList . S.filter (/= noneValue) . S.map fromMetaValue
itemids <- S.toList . S.filter (/= noneValue)
. S.map (decodeBS . fromMetaValue)
. currentMetaDataValues itemIdField
<$> getCurrentMetaData k
return (itemids, u)
@ -322,14 +324,14 @@ extractMetaData i = case getItemPublishDate (item i) :: Maybe (Maybe UTCTime) of
Just (Just d) -> unionMetaData meta (dateMetaData d meta)
_ -> meta
where
tometa (k, v) = (mkMetaFieldUnchecked k, S.singleton (toMetaValue v))
tometa (k, v) = (mkMetaFieldUnchecked (T.pack k), S.singleton (toMetaValue (encodeBS v)))
meta = MetaData $ M.fromList $ map tometa $ extractFields i
minimalMetaData :: ToDownload -> MetaData
minimalMetaData i = case getItemId (item i) of
(Nothing) -> emptyMetaData
(Just (_, itemid)) -> MetaData $ M.singleton itemIdField
(S.singleton $ toMetaValue $ fromFeed itemid)
(S.singleton $ toMetaValue $ encodeBS $ fromFeed itemid)
{- Extract fields from the feed and item, that are both used as metadata,
- and to generate the filename. -}

View file

@ -20,6 +20,7 @@ import Limit
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy.UTF8 as BU
import Control.Concurrent
@ -45,7 +46,7 @@ optParser desc = MetaDataOptions
<*> optional parseKeyOptions
<*> parseBatchOption
where
getopt = option (eitherReader mkMetaField)
getopt = option (eitherReader (mkMetaField . T.pack))
( long "get" <> short 'g' <> metavar paramField
<> help "get single metadata field"
)
@ -61,7 +62,7 @@ optParser desc = MetaDataOptions
( long "untag" <> short 'u' <> metavar "TAG"
<> help "remove a tag"
))
<|> option (eitherReader (\f -> DelMeta <$> mkMetaField f <*> pure Nothing))
<|> option (eitherReader (\f -> DelMeta <$> mkMetaField (T.pack f) <*> pure Nothing))
( long "remove" <> short 'r' <> metavar "FIELD"
<> help "remove all values of a field"
)
@ -101,7 +102,7 @@ startKeys c o (k, ai) = case getSet o of
Get f -> do
l <- S.toList . currentMetaDataValues f <$> getCurrentMetaData k
liftIO $ forM_ l $
putStrLn . fromMetaValue
B8.putStrLn . fromMetaValue
stop
_ -> do
showStartKey "metadata" k ai
@ -126,7 +127,7 @@ cleanup k = do
return True
where
unwrapmeta (f, v) = (fromMetaField f, map fromMetaValue (S.toList v))
showmeta (f, vs) = map ((f ++ "=") ++) vs
showmeta (f, vs) = map ((T.unpack f ++ "=") ++) (map decodeBS vs)
-- Metadata serialized to JSON in the field named "fields" of
-- a larger object.

View file

@ -29,6 +29,7 @@ import qualified Git.Index as Git
import qualified Git.LsFiles as Git
import qualified Data.Set as S
import qualified Data.Text as T
cmd :: Command
cmd = command "pre-commit" SectionPlumbing
@ -111,7 +112,7 @@ showMetaDataChange :: MetaData -> Annex ()
showMetaDataChange = showLongNote . unlines . concatMap showmeta . fromMetaData
where
showmeta (f, vs) = map (showmetavalue f) $ S.toList vs
showmetavalue f v = fromMetaField f ++ showset v ++ "=" ++ fromMetaValue v
showmetavalue f v = T.unpack (fromMetaField f) <> showset v <> "=" <> decodeBS (fromMetaValue v)
showset v
| isSet v = "+"
| otherwise = "-"

View file

@ -78,7 +78,7 @@ getCurrentMetaData' getlogfile k = do
let MetaData m = value l
ts = lastchangedval l
in M.map (const ts) m
lastchangedval l = S.singleton $ toMetaValue $ showts $
lastchangedval l = S.singleton $ toMetaValue $ encodeBS $ showts $
case changed l of
VectorClock t -> t
Unknown -> 0
@ -110,9 +110,9 @@ addMetaDataClocked' getlogfile k d@(MetaData m) c
| otherwise = do
config <- Annex.getGitConfig
Annex.Branch.change (getlogfile config k) $
encodeBL . showLog . simplifyLog
buildLog . simplifyLog
. S.insert (LogEntry c metadata)
. parseLog . decodeBL
. parseLog
where
metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m
@ -145,8 +145,8 @@ copyMetaData oldkey newkey
else do
config <- Annex.getGitConfig
Annex.Branch.change (metaDataLogFile config newkey) $
const $ encodeBL $ showLog l
const $ buildLog l
return True
readLog :: FilePath -> Annex (Log MetaData)
readLog = parseLog . decodeBL <$$> Annex.Branch.get
readLog = parseLog <$$> Annex.Branch.get

View file

@ -11,7 +11,7 @@ module Logs.MetaData.Pure (
Log,
LogEntry(..),
parseLog,
showLog,
buildLog,
logToCurrentMetaData,
simplifyLog,
filterRemoteMetaData,

View file

@ -20,8 +20,8 @@ import Logs
import Logs.SingleValue
instance SingleValueSerializable NumCopies where
serialize (NumCopies n) = show n
deserialize = NumCopies <$$> readish
serialize (NumCopies n) = encodeBS (show n)
deserialize = NumCopies <$$> readish . decodeBS
setGlobalNumCopies :: NumCopies -> Annex ()
setGlobalNumCopies new = do

View file

@ -26,7 +26,7 @@ import Annex.VectorClock
import qualified Data.Set as S
readLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Log v)
readLog = parseLog . decodeBL <$$> Annex.Branch.get
readLog = parseLog <$$> Annex.Branch.get
getLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Maybe v)
getLog = newestValue <$$> readLog
@ -35,4 +35,4 @@ setLog :: (SingleValueSerializable v) => FilePath -> v -> Annex ()
setLog f v = do
c <- liftIO currentVectorClock
let ent = LogEntry c v
Annex.Branch.change f $ \_old -> encodeBL (showLog (S.singleton ent))
Annex.Branch.change f $ \_old -> buildLog (S.singleton ent)

View file

@ -1,6 +1,6 @@
{- git-annex single-value log, pure operations
-
- Copyright 2014 Joey Hess <id@joeyh.name>
- Copyright 2014-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -12,10 +12,15 @@ import Logs.Line
import Annex.VectorClock
import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.Attoparsec.ByteString.Char8 (char)
import Data.ByteString.Builder
class SingleValueSerializable v where
serialize :: v -> String
deserialize :: String -> Maybe v
serialize :: v -> B.ByteString
deserialize :: B.ByteString -> Maybe v
data LogEntry v = LogEntry
{ changed :: VectorClock
@ -24,20 +29,27 @@ data LogEntry v = LogEntry
type Log v = S.Set (LogEntry v)
showLog :: (SingleValueSerializable v) => Log v -> String
showLog = unlines . map showline . S.toList
buildLog :: (SingleValueSerializable v) => Log v -> Builder
buildLog = mconcat . map genline . S.toList
where
showline (LogEntry c v) = unwords [formatVectorClock c, serialize v]
genline (LogEntry c v) =
byteString (encodeBS' (formatVectorClock c)) <> sp
<> byteString (serialize v)
<> nl
sp = charUtf8 ' '
nl = charUtf8 '\n'
parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v
parseLog = S.fromList . mapMaybe parse . splitLines
parseLog :: (Ord v, SingleValueSerializable v) => L.ByteString -> Log v
parseLog = S.fromList . fromMaybe []
. A.maybeResult . A.parse (logParser <* A.endOfInput)
logParser :: SingleValueSerializable v => A.Parser [LogEntry v]
logParser = parseLogLines $ LogEntry
<$> vectorClockParser
<* char ' '
<*> (parsevalue =<< A.takeByteString)
where
parse line = do
let (sc, s) = splitword line
c <- parseVectorClock sc
v <- deserialize s
Just (LogEntry c v)
splitword = separate (== ' ')
parsevalue = maybe (fail "log line parse failure") return . deserialize
newestValue :: Log v -> Maybe v
newestValue s

View file

@ -28,6 +28,7 @@ import qualified Git.Ref
import Git.Types
import Logs.File
import qualified Data.Text as T
import qualified Data.Set as S
import Data.Char
@ -74,15 +75,15 @@ branchView view
branchcomp c
| viewVisible c = branchcomp' c
| otherwise = "(" ++ branchcomp' c ++ ")"
branchcomp' (ViewComponent metafield viewfilter _) =concat
[ forcelegal (fromMetaField metafield)
branchcomp' (ViewComponent metafield viewfilter _) = concat
[ forcelegal (T.unpack (fromMetaField metafield))
, branchvals viewfilter
]
branchvals (FilterValues set) = '=' : branchset set
branchvals (FilterGlob glob) = '=' : forcelegal glob
branchvals (ExcludeValues set) = "!=" ++ branchset set
branchset = intercalate ","
. map (forcelegal . fromMetaValue)
. map (forcelegal . decodeBS . fromMetaValue)
. S.toList
forcelegal s
| Git.Ref.legal True s = s

View file

@ -19,6 +19,7 @@ import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import qualified Data.Map as M
import qualified Data.Set as S
import qualified System.FilePath.Posix as Posix
@ -277,7 +278,7 @@ retrieveHelper info h loc f p = liftIO $ runResourceT $ do
let req = case loc of
Left o -> S3.getObject (bucket info) o
Right (S3VersionID o vid) -> (S3.getObject (bucket info) o)
{ S3.goVersionId = Just (T.pack vid) }
{ S3.goVersionId = Just vid }
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
Url.sinkResponseFile p zeroBytesProcessed f WriteMode rsp
@ -327,7 +328,7 @@ checkKeyHelper info h loc = do
req = case loc of
Left o -> S3.headObject (bucket info) o
Right (S3VersionID o vid) -> (S3.headObject (bucket info) o)
{ S3.hoVersionId = Just (T.pack vid) }
{ S3.hoVersionId = Just vid }
#if ! MIN_VERSION_aws(0,10,0)
{- Catch exception headObject returns when an object is not present
@ -775,36 +776,34 @@ getPublicUrlMaker info = case publicurl info of
_ -> Nothing
data S3VersionID = S3VersionID S3.Object String
data S3VersionID = S3VersionID S3.Object T.Text
deriving (Show)
-- smart constructor
mkS3VersionID :: S3.Object -> Maybe T.Text -> Maybe S3VersionID
mkS3VersionID o = mkS3VersionID' o . fmap T.unpack
mkS3VersionID' :: S3.Object -> Maybe String -> Maybe S3VersionID
mkS3VersionID' o (Just s)
| null s = Nothing
mkS3VersionID o (Just t)
| T.null t = Nothing
-- AWS documentation says a version ID is at most 1024 bytes long.
-- Since they are stored in the git-annex branch, prevent them from
-- being very much larger than that.
| length s < 2048 = Just (S3VersionID o s)
| T.length t < 2048 = Just (S3VersionID o t)
| otherwise = Nothing
mkS3VersionID' _ Nothing = Nothing
mkS3VersionID _ Nothing = Nothing
-- Format for storage in per-remote metadata.
--
-- A S3 version ID is "url ready" so does not contain '#' and so we'll use
-- that to separate it from the object id. (Could use a space, but spaces
-- in metadata values lead to an inefficient encoding.)
formatS3VersionID :: S3VersionID -> String
formatS3VersionID (S3VersionID o v) = v ++ '#' : T.unpack o
formatS3VersionID :: S3VersionID -> BS.ByteString
formatS3VersionID (S3VersionID o v) = T.encodeUtf8 v <> "#" <> T.encodeUtf8 o
-- Parse from value stored in per-remote metadata.
parseS3VersionID :: String -> Maybe S3VersionID
parseS3VersionID s =
let (v, o) = separate (== '#') s
in mkS3VersionID' (T.pack o) (Just v)
parseS3VersionID :: BS.ByteString -> Maybe S3VersionID
parseS3VersionID b = do
let (v, rest) = B8.break (== '#') b
o <- eitherToMaybe $ T.decodeUtf8' $ BS.drop 1 rest
mkS3VersionID o (eitherToMaybe $ T.decodeUtf8' v)
setS3VersionID :: S3Info -> UUID -> Key -> Maybe S3VersionID -> Annex ()
setS3VersionID info u k vid
@ -843,7 +842,7 @@ s3VersionIDPublicUrl :: (S3Info -> BucketObject -> URLString) -> S3Info -> S3Ver
s3VersionIDPublicUrl mk info (S3VersionID obj vid) = mk info $ concat
[ T.unpack obj
, "?versionId="
, vid -- version ID is "url ready" so no escaping needed
, T.unpack vid -- version ID is "url ready" so no escaping needed
]
getS3VersionIDPublicUrls :: (S3Info -> BucketObject -> URLString) -> S3Info -> UUID -> Key -> Annex [URLString]