assistant: generate better commits for renames

This commit is contained in:
Joey Hess 2013-03-10 21:36:13 -04:00
parent b2c7ee5551
commit 2762ab03b4
9 changed files with 130 additions and 68 deletions

View file

@ -12,6 +12,7 @@ import Assistant.Types.Changes
import Utility.TSet import Utility.TSet
import Data.Time.Clock import Data.Time.Clock
import Control.Concurrent.STM
{- Handlers call this when they made a change that needs to get committed. -} {- Handlers call this when they made a change that needs to get committed. -}
madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change) madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change)
@ -27,13 +28,17 @@ pendingAddChange f = Just <$> (PendingAddChange <$> liftIO getCurrentTime <*> pu
{- Gets all unhandled changes. {- Gets all unhandled changes.
- Blocks until at least one change is made. -} - Blocks until at least one change is made. -}
getChanges :: Assistant [Change] getChanges :: Assistant [Change]
getChanges = getTSet <<~ changeChan getChanges = (atomically . getTSet) <<~ changeChan
{- Gets all unhandled changes, without blocking. -}
getAnyChanges :: Assistant [Change]
getAnyChanges = (atomically . readTSet) <<~ changeChan
{- Puts unhandled changes back into the channel. {- Puts unhandled changes back into the channel.
- Note: Original order is not preserved. -} - Note: Original order is not preserved. -}
refillChanges :: [Change] -> Assistant () refillChanges :: [Change] -> Assistant ()
refillChanges cs = flip putTSet cs <<~ changeChan refillChanges cs = (atomically . flip putTSet cs) <<~ changeChan
{- Records a change in the channel. -} {- Records a change in the channel. -}
recordChange :: Change -> Assistant () recordChange :: Change -> Assistant ()
recordChange c = flip putTSet1 c <<~ changeChan recordChange c = (atomically . flip putTSet1 c) <<~ changeChan

View file

@ -9,19 +9,20 @@ module Assistant.Commits where
import Assistant.Common import Assistant.Common
import Assistant.Types.Commits import Assistant.Types.Commits
import Utility.TSet import Utility.TSet
import Control.Concurrent.STM
{- Gets all unhandled commits. {- Gets all unhandled commits.
- Blocks until at least one commit is made. -} - Blocks until at least one commit is made. -}
getCommits :: Assistant [Commit] getCommits :: Assistant [Commit]
getCommits = getTSet <<~ commitChan getCommits = (atomically . getTSet) <<~ commitChan
{- Puts unhandled commits back into the channel. {- Puts unhandled commits back into the channel.
- Note: Original order is not preserved. -} - Note: Original order is not preserved. -}
refillCommits :: [Commit] -> Assistant () refillCommits :: [Commit] -> Assistant ()
refillCommits cs = flip putTSet cs <<~ commitChan refillCommits cs = (atomically . flip putTSet cs) <<~ commitChan
{- Records a commit in the channel. -} {- Records a commit in the channel. -}
recordCommit :: Assistant () recordCommit :: Assistant ()
recordCommit = flip putTSet1 Commit <<~ commitChan recordCommit = (atomically . flip putTSet1 Commit) <<~ commitChan

View file

@ -38,6 +38,7 @@ import Data.Time.Clock
import Data.Tuple.Utils import Data.Tuple.Utils
import qualified Data.Set as S import qualified Data.Set as S
import Data.Either import Data.Either
import Control.Concurrent
{- This thread makes git commits at appropriate times. -} {- This thread makes git commits at appropriate times. -}
commitThread :: NamedThread commitThread :: NamedThread
@ -45,34 +46,81 @@ commitThread = namedThread "Committer" $ do
delayadd <- liftAnnex $ delayadd <- liftAnnex $
maybe delayaddDefault (return . Just . Seconds) maybe delayaddDefault (return . Just . Seconds)
=<< annexDelayAdd <$> Annex.getGitConfig =<< annexDelayAdd <$> Annex.getGitConfig
runEvery (Seconds 1) <~> do waitChangeTime $ \(changes, time) -> do
-- We already waited one second as a simple rate limiter. readychanges <- handleAdds delayadd changes
-- Next, wait until at least one change is available for if shouldCommit time readychanges
-- processing.
changes <- getChanges
-- Now see if now's a good time to commit.
time <- liftIO getCurrentTime
if shouldCommit time changes
then do then do
readychanges <- handleAdds delayadd changes debug
if shouldCommit time readychanges [ "committing"
then do , show (length readychanges)
debug , "changes"
[ "committing" ]
, show (length readychanges) void $ alertWhile commitAlert $
, "changes" liftAnnex commitStaged
] recordCommit
void $ alertWhile commitAlert $ mapM_ checkChangeContent readychanges
liftAnnex commitStaged else refill readychanges
recordCommit
mapM_ checkChangeContent readychanges refill :: [Change] -> Assistant ()
else refill readychanges refill [] = noop
else refill changes refill cs = do
debug ["delaying commit of", show (length cs), "changes"]
refillChanges cs
{- Wait for one or more changes to arrive to be committed. -}
waitChangeTime :: (([Change], UTCTime) -> Assistant ()) -> Assistant ()
waitChangeTime a = runEvery (Seconds 1) <~> do
-- We already waited one second as a simple rate limiter.
-- Next, wait until at least one change is available for
-- processing.
changes <- getChanges
-- See if now's a good time to commit.
now <- liftIO getCurrentTime
case (shouldCommit now changes, lonelychange changes) of
(True, False) -> a (changes, now)
(True, True) -> do
-- Wait for other, related changes to arrive.
liftIO $ humanImperceptibleDelay
-- Don't block, but are there any?
morechanges <- getAnyChanges
let allchanges = changes++morechanges
a (allchanges, now)
_ -> refill changes
where where
refill [] = noop {- Did we perhaps only get one of the AddChange and RmChange pair
refill cs = do - that make up a rename? -}
debug ["delaying commit of", show (length cs), "changes"] lonelychange [(PendingAddChange _ _)] = True
refillChanges cs lonelychange [(Change { changeInfo = i })] | i == RmChange = True
lonelychange _ = False
{- An amount of time that is hopefully imperceptably short for humans,
- while long enough for a computer to get some work done.
- Note that 0.001 is a little too short for rename change batching to
- work. -}
humanImperceptibleInterval :: NominalDiffTime
humanImperceptibleInterval = 0.01
humanImperceptibleDelay :: IO ()
humanImperceptibleDelay = threadDelay $
truncate $ humanImperceptibleInterval * fromIntegral oneSecond
{- Decide if now is a good time to make a commit.
- Note that the list of changes has an undefined order.
-
- Current strategy: If there have been 10 changes within the past second,
- a batch activity is taking place, so wait for later.
-}
shouldCommit :: UTCTime -> [Change] -> Bool
shouldCommit now changes
| len == 0 = False
| len > 10000 = True -- avoid bloating queue too much
| length recentchanges < 10 = True
| otherwise = False -- batch activity
where
len = length changes
thissecond c = timeDelta c <= 1
recentchanges = filter thissecond changes
timeDelta c = now `diffUTCTime` changeTime c
commitStaged :: Annex Bool commitStaged :: Annex Bool
commitStaged = do commitStaged = do
@ -105,22 +153,6 @@ commitStaged = do
| otherwise = Param "--allow-empty-message" | otherwise = Param "--allow-empty-message"
: Param "-m" : Param "" : ps : Param "-m" : Param "" : ps
{- Decide if now is a good time to make a commit.
- Note that the list of changes has an undefined order.
-
- Current strategy: If there have been 10 changes within the past second,
- a batch activity is taking place, so wait for later.
-}
shouldCommit :: UTCTime -> [Change] -> Bool
shouldCommit now changes
| len == 0 = False
| len > 10000 = True -- avoid bloating queue too much
| length (filter thisSecond changes) < 10 = True
| otherwise = False -- batch activity
where
len = length changes
thisSecond c = now `diffUTCTime` changeTime c <= 1
{- OSX needs a short delay after a file is added before locking it down, {- OSX needs a short delay after a file is added before locking it down,
- when using a non-direct mode repository, as pasting a file seems to - when using a non-direct mode repository, as pasting a file seems to
- try to set file permissions or otherwise access the file after closing - try to set file permissions or otherwise access the file after closing

View file

@ -12,6 +12,7 @@ import Types.Key
import Utility.TSet import Utility.TSet
import Data.Time.Clock import Data.Time.Clock
import Control.Concurrent.STM
data ChangeInfo = AddChange Key | LinkChange (Maybe Key) | RmChange | RmDirChange data ChangeInfo = AddChange Key | LinkChange (Maybe Key) | RmChange | RmDirChange
deriving (Show, Eq) deriving (Show, Eq)
@ -40,7 +41,7 @@ data Change
deriving (Show) deriving (Show)
newChangeChan :: IO ChangeChan newChangeChan :: IO ChangeChan
newChangeChan = newTSet newChangeChan = atomically newTSet
isPendingAddChange :: Change -> Bool isPendingAddChange :: Change -> Bool
isPendingAddChange (PendingAddChange {}) = True isPendingAddChange (PendingAddChange {}) = True

View file

@ -9,9 +9,11 @@ module Assistant.Types.Commits where
import Utility.TSet import Utility.TSet
import Control.Concurrent.STM
type CommitChan = TSet Commit type CommitChan = TSet Commit
data Commit = Commit data Commit = Commit
newCommitChan :: IO CommitChan newCommitChan :: IO CommitChan
newCommitChan = newTSet newCommitChan = atomically newTSet

View file

@ -1,6 +1,6 @@
{- Transactional sets {- Transactional sets
- -
- Copyright 2012 Joey Hess <joey@kitenet.net> - Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
-} -}
module Utility.TSet where module Utility.TSet where
@ -11,18 +11,20 @@ import Control.Concurrent.STM
type TSet = TChan type TSet = TChan
runTSet :: STM a -> IO a newTSet :: STM (TSet a)
runTSet = atomically newTSet = newTChan
newTSet :: IO (TSet a)
newTSet = atomically newTChan
{- Gets the contents of the TSet. Blocks until at least one item is {- Gets the contents of the TSet. Blocks until at least one item is
- present. -} - present. -}
getTSet :: TSet a -> IO [a] getTSet :: TSet a -> STM [a]
getTSet tset = runTSet $ do getTSet tset = do
c <- readTChan tset c <- readTChan tset
go [c] l <- readTSet tset
return $ c:l
{- Gets anything currently in the TSet, without blocking. -}
readTSet :: TSet a -> STM [a]
readTSet tset = go []
where where
go l = do go l = do
v <- tryReadTChan tset v <- tryReadTChan tset
@ -31,9 +33,9 @@ getTSet tset = runTSet $ do
Just c -> go (c:l) Just c -> go (c:l)
{- Puts items into a TSet. -} {- Puts items into a TSet. -}
putTSet :: TSet a -> [a] -> IO () putTSet :: TSet a -> [a] -> STM ()
putTSet tset vs = runTSet $ mapM_ (writeTChan tset) vs putTSet tset vs = mapM_ (writeTChan tset) vs
{- Put a single item into a TSet. -} {- Put a single item into a TSet. -}
putTSet1 :: TSet a -> a -> IO () putTSet1 :: TSet a -> a -> STM ()
putTSet1 tset v = void $ runTSet $ writeTChan tset v putTSet1 tset v = void $ writeTChan tset v

View file

@ -1,6 +1,6 @@
{- thread scheduling {- thread scheduling
- -
- Copyright 2012 Joey Hess <joey@kitenet.net> - Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
- Copyright 2011 Bas van Dijk & Roel van Dijk - Copyright 2011 Bas van Dijk & Roel van Dijk
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
@ -14,6 +14,7 @@ import Common
import Control.Concurrent import Control.Concurrent
import System.Posix.Signals import System.Posix.Signals
import Data.Time.Clock
#ifndef __ANDROID__ #ifndef __ANDROID__
import System.Posix.Terminal import System.Posix.Terminal
#endif #endif
@ -21,6 +22,8 @@ import System.Posix.Terminal
newtype Seconds = Seconds { fromSeconds :: Int } newtype Seconds = Seconds { fromSeconds :: Int }
deriving (Eq, Ord, Show) deriving (Eq, Ord, Show)
type Microseconds = Integer
{- Runs an action repeatedly forever, sleeping at least the specified number {- Runs an action repeatedly forever, sleeping at least the specified number
- of seconds in between. -} - of seconds in between. -}
runEvery :: Seconds -> IO a -> IO a runEvery :: Seconds -> IO a -> IO a
@ -30,8 +33,6 @@ runEvery n a = forever $ do
threadDelaySeconds :: Seconds -> IO () threadDelaySeconds :: Seconds -> IO ()
threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond) threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
where
oneSecond = 1000000 -- microseconds
{- Like threadDelay, but not bounded by an Int. {- Like threadDelay, but not bounded by an Int.
- -
@ -42,7 +43,7 @@ threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
- Taken from the unbounded-delay package to avoid a dependency for 4 lines - Taken from the unbounded-delay package to avoid a dependency for 4 lines
- of code. - of code.
-} -}
unboundDelay :: Integer -> IO () unboundDelay :: Microseconds -> IO ()
unboundDelay time = do unboundDelay time = do
let maxWait = min time $ toInteger (maxBound :: Int) let maxWait = min time $ toInteger (maxBound :: Int)
threadDelay $ fromInteger maxWait threadDelay $ fromInteger maxWait
@ -61,3 +62,6 @@ waitForTermination = do
where where
check sig lock = void $ check sig lock = void $
installHandler sig (CatchOnce $ putMVar lock ()) Nothing installHandler sig (CatchOnce $ putMVar lock ()) Nothing
oneSecond :: Microseconds
oneSecond = 1000000

1
debian/changelog vendored
View file

@ -42,6 +42,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low
(See https://github.com/yesodweb/wai/issues/146) (See https://github.com/yesodweb/wai/issues/146)
* bugfix: drop --from an unavailable remote no longer updates the location * bugfix: drop --from an unavailable remote no longer updates the location
log, incorrectly, to say the remote does not have the key. log, incorrectly, to say the remote does not have the key.
* assistant: Generate better commits for renames.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400 -- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400

View file

@ -13,3 +13,17 @@ Fixed some problems around USB drives. One was a real jaw-dropping
bug: "git annex drop --from usbdrive" when the drive was not bug: "git annex drop --from usbdrive" when the drive was not
connected still updated the location log to indicate it did not have connected still updated the location log to indicate it did not have
the file anymore! (Thank goodness for fsck..) the file anymore! (Thank goodness for fsck..)
I've noticed that moving around files in direct mode repos is inneficient,
because the assistant re-checksums the "new" file. One way to avoid
that would be to have a lookup table from (inode, size, mtime) to
key, but I don't have one, and would like to avoid adding one.
Instead, I have a cunning plan to deal with this heuristically. If the
assistant can notice a file was removed and another file added at the same
time, it can compare the (inode, size, mtime) to see if it's a rename, and
avoid the checksum overhead.
The first step to getting there was to make the assistant better at
batching together delete+add events into a single rename commit. I'm happy
to say I've accomplished that, with no perceptable delay to commits.