use attoparsec parser for String parsing, 10x speedup

This is not as efficient as using ByteStrings throughout, but converting the String to ByteString is actually significantly faster than the old parser. benchmarking parse/old time 9.657 μs (9.600 μs .. 9.732 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 9.703 μs (9.645 μs .. 9.785 μs) std dev 231.6 ns (161.5 ns .. 323.7 ns) variance introduced by outliers: 25% (moderately inflated) benchmarking parse/new time 834.6 ns (797.1 ns .. 886.9 ns) 0.987 R² (0.976 R² .. 0.999 R²) mean 816.4 ns (802.7 ns .. 845.1 ns) std dev 62.39 ns (37.66 ns .. 108.4 ns) variance introduced by outliers: 82% (severely inflated) There is a small behavior change from the old parsePOSIXTime, which accepted any amount of trailing whitespace after the timestamp. That behavior was not documented, and it doesn't seem anything relied on it.
2019-01-02 13:13:17 -04:00 · 2019-01-02 13:13:17 -04:00 · 3ba6e9bb96
commit 3ba6e9bb96
parent 3c74dcd4e1
3 changed files with 6 additions and 20 deletions
--- a/1
+++ b/1
@ -12,6 +12,7 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
    used so it will also work with v7 unlocked pointer files.
  * Fix doubled progress display when downloading an url when -J is used.
  * importfeed: Better error message when downloading the feed fails.
+  * Optimised timestamp parser is 10x faster.

 -- Joey Hess <id@joeyh.name>  Tue, 18 Dec 2018 12:24:52 -0400

--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@ -156,7 +156,7 @@ readInodeCache s = case words s of
 	(inode:size:mtime:mtimedecimal:_) -> do
 		i <- readish inode
 		sz <- readish size
-		t <- parsePOSIXTime' mtime mtimedecimal
+		t <- parsePOSIXTime $ mtime ++ '.' : mtimedecimal
 		return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
 	_ -> Nothing

--- a/Utility/TimeStamp.hs
+++ b/Utility/TimeStamp.hs
@ -9,14 +9,14 @@

 module Utility.TimeStamp where

-import Utility.PartialPrelude
-import Utility.Misc
+import Utility.Data

 import Data.Time.Clock.POSIX
 import Data.Time
 import Data.Ratio
 import Control.Applicative
 import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
 import qualified Data.Attoparsec.ByteString as A
 import Data.Attoparsec.ByteString.Char8 (char, decimal, signed, isDigit_w8)
 #if ! MIN_VERSION_time(1,5,0)
@ -41,24 +41,9 @@ parserPOSIXTime = mkPOSIXTime
 			A.parseOnly (decimal <* A.endOfInput) b
 		return (d, len)

-{- Parses how POSIXTime shows itself: "1431286201.113452s"
- - Also handles the format with no fractional seconds. -}
 parsePOSIXTime :: String -> Maybe POSIXTime
-parsePOSIXTime = uncurry parsePOSIXTime' . separate (== '.')
-
-{- Parses the integral and decimal part of a POSIXTime -}
-parsePOSIXTime' :: String -> String -> Maybe POSIXTime
-parsePOSIXTime' sn sd = do
-	n <- readi sn
-	let sd' = takeWhile (/= 's') sd
-	if null sd'
-		then return (fromIntegral n)
-		else do
-			d <- readi sd'
-			return $ mkPOSIXTime n (d, length sd')
-  where
-	readi :: String -> Maybe Integer
-	readi = readish
+parsePOSIXTime s = eitherToMaybe $ 
+	A.parseOnly (parserPOSIXTime <* A.endOfInput) (B8.pack s)

 {- This implementation allows for higher precision in a POSIXTime than
 - supported by the system's Double, and avoids the complications of