From 985dd38847452d522b9eac84b3331ded3d17df8e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 25 Oct 2023 14:44:57 -0400 Subject: [PATCH] add --- doc/todo/RawFilePath_conversion.mdwn | 75 ++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 doc/todo/RawFilePath_conversion.mdwn diff --git a/doc/todo/RawFilePath_conversion.mdwn b/doc/todo/RawFilePath_conversion.mdwn new file mode 100644 index 0000000000..04a56c4796 --- /dev/null +++ b/doc/todo/RawFilePath_conversion.mdwn @@ -0,0 +1,75 @@ +For a long time (since 2019) git-annex has been progressively converting from +FilePath to RawFilePath (aka ByteString). + +The reason is mostly performance, also a simpler representation of +filepaths that doesn't need encoding hacks to support non-UTF8 values. + +Some commands like `git-annex find` use RawFilePath end-to-end. +But this conversion is not yet complete. This is a todo to keep track of the +status. + +* The Abstract FilePath proposal (AFPP) has been implemented, and so a number of + libraries like unix and directory now have versions that operate on + OSPath. That could be used in git-annex eg for things like + getDirectoryContents, when built against those versions. + (But OSPath uses ShortByteString, while RawFilePath is ByteString, so + conversion still entails a copy.) +* withFile remains to be converted, and is used in several important code + paths, including Annex.Journal and Annex.Link. + There is a RawFilePath version in the rawfilepath library, but that is + not currently a git-annex dependency. (withFile is in base, and base is + unlikely to convert to AFPP soon) + +[[!tag confirmed]] + +---- + +The following patch can be useful to find points where conversions are +done. Especially useful to identify cases where a value is converted +`FilePath -> RawFilePath -> FilePath`. + + diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs + index 2a1dc81bc1..03e6986f6e 100644 + --- a/Utility/FileSystemEncoding.hs + +++ b/Utility/FileSystemEncoding.hs + @@ -84,6 +84,9 @@ encodeBL = L.fromStrict . encodeBS + encodeBL = L8.fromString + #endif + + +debugConversions :: String -> IO () + +debugConversions s = hPutStrLn stderr ("conversion: " ++ s) + + + decodeBS :: S.ByteString -> FilePath + #ifndef mingw32_HOST_OS + -- This does the same thing as System.FilePath.ByteString.decodeFilePath, + @@ -92,6 +95,7 @@ decodeBS :: S.ByteString -> FilePath + -- something other than a unix filepath. + {-# NOINLINE decodeBS #-} + decodeBS b = unsafePerformIO $ do + + debugConversions (show b) + enc <- Encoding.getFileSystemEncoding + S.useAsCStringLen b (GHC.peekCStringLen enc) + #else + @@ -106,17 +110,19 @@ encodeBS :: FilePath -> S.ByteString + -- something other than a unix filepath. + {-# NOINLINE encodeBS #-} + encodeBS f = unsafePerformIO $ do + + debugConversions f + enc <- Encoding.getFileSystemEncoding + - GHC.newCStringLen enc f >>= unsafePackMallocCStringLen + + b <- GHC.newCStringLen enc f >>= unsafePackMallocCStringLen + + return b + #else + encodeBS = S8.fromString + #endif + + fromRawFilePath :: RawFilePath -> FilePath + -fromRawFilePath = decodeFilePath + +fromRawFilePath = decodeBS -- decodeFilePath + + toRawFilePath :: FilePath -> RawFilePath + -toRawFilePath = encodeFilePath + +toRawFilePath = encodeBS -- encodeFilePath + + {- Truncates a FilePath to the given number of bytes (or less), + - as represented on disk.