From fc3fd0cfe0e33abe5730b1c3e593804c98b75644 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 5 Jan 2019 08:06:47 -0400 Subject: [PATCH] do union merge on bytestrings My concern with using bytestring for this is the file needs to be split into lines, and the encoding is not known. It's safe to split a utf-8 encoded file on the \n byte; only newlines get encoded to that byte in utf-8. And this code already assumes utf-8 or ascii encoding, because it used the filesystem encoding. --- Git/UnionMerge.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index d3f596c634..3a8b997daf 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -11,6 +11,7 @@ module Git.UnionMerge ( ) where import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Lazy.Char8 as L8 import qualified Data.Set as S import Common @@ -85,26 +86,25 @@ mergeFile info file hashhandle h = case filter (/= nullSha) [Ref asha, Ref bsha] [] -> return Nothing (sha:[]) -> use sha shas -> use - =<< either return (hashBlob hashhandle . encodeBS . unlines) + =<< either return (hashBlob hashhandle . L8.unlines) =<< calcMerge . zip shas <$> mapM getcontents shas where [_colonmode, _bmode, asha, bsha, _status] = words info use sha = return $ Just $ updateIndexLine sha TreeFile $ asTopFilePath file - -- We don't know how the file is encoded, but need to - -- split it into lines to union merge. Using the - -- FileSystemEncoding for this is a hack, but ensures there - -- are no decoding errors. - getcontents s = lines . encodeW8NUL . L.unpack <$> catObject h s + -- Get file and split into lines to union merge. + -- The encoding of the file is assumed to be either ASCII or utf-8; + -- in either case it's safe to split on \n + getcontents s = L8.lines <$> catObject h s {- Calculates a union merge between a list of refs, with contents. - - When possible, reuses the content of an existing ref, rather than - generating new content. -} -calcMerge :: [(Ref, [String])] -> Either Ref [String] +calcMerge :: [(Ref, [L8.ByteString])] -> Either Ref [L8.ByteString] calcMerge shacontents - | null reuseable = Right $ new + | null reuseable = Right new | otherwise = Left $ fst $ Prelude.head reuseable where reuseable = filter (\c -> sorteduniq (snd c) == new) shacontents