assistant: generate better commits for renames

This commit is contained in:
Joey Hess 2013-03-10 21:36:13 -04:00
parent b2c7ee5551
commit 2762ab03b4
9 changed files with 130 additions and 68 deletions

View file

@ -12,6 +12,7 @@ import Assistant.Types.Changes
import Utility.TSet
import Data.Time.Clock
import Control.Concurrent.STM
{- Handlers call this when they made a change that needs to get committed. -}
madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change)
@ -27,13 +28,17 @@ pendingAddChange f = Just <$> (PendingAddChange <$> liftIO getCurrentTime <*> pu
{- Gets all unhandled changes.
- Blocks until at least one change is made. -}
getChanges :: Assistant [Change]
getChanges = getTSet <<~ changeChan
getChanges = (atomically . getTSet) <<~ changeChan
{- Gets all unhandled changes, without blocking. -}
getAnyChanges :: Assistant [Change]
getAnyChanges = (atomically . readTSet) <<~ changeChan
{- Puts unhandled changes back into the channel.
- Note: Original order is not preserved. -}
refillChanges :: [Change] -> Assistant ()
refillChanges cs = flip putTSet cs <<~ changeChan
refillChanges cs = (atomically . flip putTSet cs) <<~ changeChan
{- Records a change in the channel. -}
recordChange :: Change -> Assistant ()
recordChange c = flip putTSet1 c <<~ changeChan
recordChange c = (atomically . flip putTSet1 c) <<~ changeChan

View file

@ -9,19 +9,20 @@ module Assistant.Commits where
import Assistant.Common
import Assistant.Types.Commits
import Utility.TSet
import Control.Concurrent.STM
{- Gets all unhandled commits.
- Blocks until at least one commit is made. -}
getCommits :: Assistant [Commit]
getCommits = getTSet <<~ commitChan
getCommits = (atomically . getTSet) <<~ commitChan
{- Puts unhandled commits back into the channel.
- Note: Original order is not preserved. -}
refillCommits :: [Commit] -> Assistant ()
refillCommits cs = flip putTSet cs <<~ commitChan
refillCommits cs = (atomically . flip putTSet cs) <<~ commitChan
{- Records a commit in the channel. -}
recordCommit :: Assistant ()
recordCommit = flip putTSet1 Commit <<~ commitChan
recordCommit = (atomically . flip putTSet1 Commit) <<~ commitChan

View file

@ -38,6 +38,7 @@ import Data.Time.Clock
import Data.Tuple.Utils
import qualified Data.Set as S
import Data.Either
import Control.Concurrent
{- This thread makes git commits at appropriate times. -}
commitThread :: NamedThread
@ -45,34 +46,81 @@ commitThread = namedThread "Committer" $ do
delayadd <- liftAnnex $
maybe delayaddDefault (return . Just . Seconds)
=<< annexDelayAdd <$> Annex.getGitConfig
runEvery (Seconds 1) <~> do
-- We already waited one second as a simple rate limiter.
-- Next, wait until at least one change is available for
-- processing.
changes <- getChanges
-- Now see if now's a good time to commit.
time <- liftIO getCurrentTime
if shouldCommit time changes
waitChangeTime $ \(changes, time) -> do
readychanges <- handleAdds delayadd changes
if shouldCommit time readychanges
then do
readychanges <- handleAdds delayadd changes
if shouldCommit time readychanges
then do
debug
[ "committing"
, show (length readychanges)
, "changes"
]
void $ alertWhile commitAlert $
liftAnnex commitStaged
recordCommit
mapM_ checkChangeContent readychanges
else refill readychanges
else refill changes
debug
[ "committing"
, show (length readychanges)
, "changes"
]
void $ alertWhile commitAlert $
liftAnnex commitStaged
recordCommit
mapM_ checkChangeContent readychanges
else refill readychanges
refill :: [Change] -> Assistant ()
refill [] = noop
refill cs = do
debug ["delaying commit of", show (length cs), "changes"]
refillChanges cs
{- Wait for one or more changes to arrive to be committed. -}
waitChangeTime :: (([Change], UTCTime) -> Assistant ()) -> Assistant ()
waitChangeTime a = runEvery (Seconds 1) <~> do
-- We already waited one second as a simple rate limiter.
-- Next, wait until at least one change is available for
-- processing.
changes <- getChanges
-- See if now's a good time to commit.
now <- liftIO getCurrentTime
case (shouldCommit now changes, lonelychange changes) of
(True, False) -> a (changes, now)
(True, True) -> do
-- Wait for other, related changes to arrive.
liftIO $ humanImperceptibleDelay
-- Don't block, but are there any?
morechanges <- getAnyChanges
let allchanges = changes++morechanges
a (allchanges, now)
_ -> refill changes
where
refill [] = noop
refill cs = do
debug ["delaying commit of", show (length cs), "changes"]
refillChanges cs
{- Did we perhaps only get one of the AddChange and RmChange pair
- that make up a rename? -}
lonelychange [(PendingAddChange _ _)] = True
lonelychange [(Change { changeInfo = i })] | i == RmChange = True
lonelychange _ = False
{- An amount of time that is hopefully imperceptably short for humans,
- while long enough for a computer to get some work done.
- Note that 0.001 is a little too short for rename change batching to
- work. -}
humanImperceptibleInterval :: NominalDiffTime
humanImperceptibleInterval = 0.01
humanImperceptibleDelay :: IO ()
humanImperceptibleDelay = threadDelay $
truncate $ humanImperceptibleInterval * fromIntegral oneSecond
{- Decide if now is a good time to make a commit.
- Note that the list of changes has an undefined order.
-
- Current strategy: If there have been 10 changes within the past second,
- a batch activity is taking place, so wait for later.
-}
shouldCommit :: UTCTime -> [Change] -> Bool
shouldCommit now changes
| len == 0 = False
| len > 10000 = True -- avoid bloating queue too much
| length recentchanges < 10 = True
| otherwise = False -- batch activity
where
len = length changes
thissecond c = timeDelta c <= 1
recentchanges = filter thissecond changes
timeDelta c = now `diffUTCTime` changeTime c
commitStaged :: Annex Bool
commitStaged = do
@ -105,22 +153,6 @@ commitStaged = do
| otherwise = Param "--allow-empty-message"
: Param "-m" : Param "" : ps
{- Decide if now is a good time to make a commit.
- Note that the list of changes has an undefined order.
-
- Current strategy: If there have been 10 changes within the past second,
- a batch activity is taking place, so wait for later.
-}
shouldCommit :: UTCTime -> [Change] -> Bool
shouldCommit now changes
| len == 0 = False
| len > 10000 = True -- avoid bloating queue too much
| length (filter thisSecond changes) < 10 = True
| otherwise = False -- batch activity
where
len = length changes
thisSecond c = now `diffUTCTime` changeTime c <= 1
{- OSX needs a short delay after a file is added before locking it down,
- when using a non-direct mode repository, as pasting a file seems to
- try to set file permissions or otherwise access the file after closing

View file

@ -12,6 +12,7 @@ import Types.Key
import Utility.TSet
import Data.Time.Clock
import Control.Concurrent.STM
data ChangeInfo = AddChange Key | LinkChange (Maybe Key) | RmChange | RmDirChange
deriving (Show, Eq)
@ -40,7 +41,7 @@ data Change
deriving (Show)
newChangeChan :: IO ChangeChan
newChangeChan = newTSet
newChangeChan = atomically newTSet
isPendingAddChange :: Change -> Bool
isPendingAddChange (PendingAddChange {}) = True

View file

@ -9,9 +9,11 @@ module Assistant.Types.Commits where
import Utility.TSet
import Control.Concurrent.STM
type CommitChan = TSet Commit
data Commit = Commit
newCommitChan :: IO CommitChan
newCommitChan = newTSet
newCommitChan = atomically newTSet

View file

@ -1,6 +1,6 @@
{- Transactional sets
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
-}
module Utility.TSet where
@ -11,18 +11,20 @@ import Control.Concurrent.STM
type TSet = TChan
runTSet :: STM a -> IO a
runTSet = atomically
newTSet :: IO (TSet a)
newTSet = atomically newTChan
newTSet :: STM (TSet a)
newTSet = newTChan
{- Gets the contents of the TSet. Blocks until at least one item is
- present. -}
getTSet :: TSet a -> IO [a]
getTSet tset = runTSet $ do
getTSet :: TSet a -> STM [a]
getTSet tset = do
c <- readTChan tset
go [c]
l <- readTSet tset
return $ c:l
{- Gets anything currently in the TSet, without blocking. -}
readTSet :: TSet a -> STM [a]
readTSet tset = go []
where
go l = do
v <- tryReadTChan tset
@ -31,9 +33,9 @@ getTSet tset = runTSet $ do
Just c -> go (c:l)
{- Puts items into a TSet. -}
putTSet :: TSet a -> [a] -> IO ()
putTSet tset vs = runTSet $ mapM_ (writeTChan tset) vs
putTSet :: TSet a -> [a] -> STM ()
putTSet tset vs = mapM_ (writeTChan tset) vs
{- Put a single item into a TSet. -}
putTSet1 :: TSet a -> a -> IO ()
putTSet1 tset v = void $ runTSet $ writeTChan tset v
putTSet1 :: TSet a -> a -> STM ()
putTSet1 tset v = void $ writeTChan tset v

View file

@ -1,6 +1,6 @@
{- thread scheduling
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
- Copyright 2011 Bas van Dijk & Roel van Dijk
-
- Licensed under the GNU GPL version 3 or higher.
@ -14,6 +14,7 @@ import Common
import Control.Concurrent
import System.Posix.Signals
import Data.Time.Clock
#ifndef __ANDROID__
import System.Posix.Terminal
#endif
@ -21,6 +22,8 @@ import System.Posix.Terminal
newtype Seconds = Seconds { fromSeconds :: Int }
deriving (Eq, Ord, Show)
type Microseconds = Integer
{- Runs an action repeatedly forever, sleeping at least the specified number
- of seconds in between. -}
runEvery :: Seconds -> IO a -> IO a
@ -30,8 +33,6 @@ runEvery n a = forever $ do
threadDelaySeconds :: Seconds -> IO ()
threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
where
oneSecond = 1000000 -- microseconds
{- Like threadDelay, but not bounded by an Int.
-
@ -42,7 +43,7 @@ threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
- Taken from the unbounded-delay package to avoid a dependency for 4 lines
- of code.
-}
unboundDelay :: Integer -> IO ()
unboundDelay :: Microseconds -> IO ()
unboundDelay time = do
let maxWait = min time $ toInteger (maxBound :: Int)
threadDelay $ fromInteger maxWait
@ -61,3 +62,6 @@ waitForTermination = do
where
check sig lock = void $
installHandler sig (CatchOnce $ putMVar lock ()) Nothing
oneSecond :: Microseconds
oneSecond = 1000000

1
debian/changelog vendored
View file

@ -42,6 +42,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low
(See https://github.com/yesodweb/wai/issues/146)
* bugfix: drop --from an unavailable remote no longer updates the location
log, incorrectly, to say the remote does not have the key.
* assistant: Generate better commits for renames.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400

View file

@ -13,3 +13,17 @@ Fixed some problems around USB drives. One was a real jaw-dropping
bug: "git annex drop --from usbdrive" when the drive was not
connected still updated the location log to indicate it did not have
the file anymore! (Thank goodness for fsck..)
I've noticed that moving around files in direct mode repos is inneficient,
because the assistant re-checksums the "new" file. One way to avoid
that would be to have a lookup table from (inode, size, mtime) to
key, but I don't have one, and would like to avoid adding one.
Instead, I have a cunning plan to deal with this heuristically. If the
assistant can notice a file was removed and another file added at the same
time, it can compare the (inode, size, mtime) to see if it's a rename, and
avoid the checksum overhead.
The first step to getting there was to make the assistant better at
batching together delete+add events into a single rename commit. I'm happy
to say I've accomplished that, with no perceptable delay to commits.