assistant: generate better commits for renames
This commit is contained in:
parent
b2c7ee5551
commit
2762ab03b4
9 changed files with 130 additions and 68 deletions
|
@ -12,6 +12,7 @@ import Assistant.Types.Changes
|
||||||
import Utility.TSet
|
import Utility.TSet
|
||||||
|
|
||||||
import Data.Time.Clock
|
import Data.Time.Clock
|
||||||
|
import Control.Concurrent.STM
|
||||||
|
|
||||||
{- Handlers call this when they made a change that needs to get committed. -}
|
{- Handlers call this when they made a change that needs to get committed. -}
|
||||||
madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change)
|
madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change)
|
||||||
|
@ -27,13 +28,17 @@ pendingAddChange f = Just <$> (PendingAddChange <$> liftIO getCurrentTime <*> pu
|
||||||
{- Gets all unhandled changes.
|
{- Gets all unhandled changes.
|
||||||
- Blocks until at least one change is made. -}
|
- Blocks until at least one change is made. -}
|
||||||
getChanges :: Assistant [Change]
|
getChanges :: Assistant [Change]
|
||||||
getChanges = getTSet <<~ changeChan
|
getChanges = (atomically . getTSet) <<~ changeChan
|
||||||
|
|
||||||
|
{- Gets all unhandled changes, without blocking. -}
|
||||||
|
getAnyChanges :: Assistant [Change]
|
||||||
|
getAnyChanges = (atomically . readTSet) <<~ changeChan
|
||||||
|
|
||||||
{- Puts unhandled changes back into the channel.
|
{- Puts unhandled changes back into the channel.
|
||||||
- Note: Original order is not preserved. -}
|
- Note: Original order is not preserved. -}
|
||||||
refillChanges :: [Change] -> Assistant ()
|
refillChanges :: [Change] -> Assistant ()
|
||||||
refillChanges cs = flip putTSet cs <<~ changeChan
|
refillChanges cs = (atomically . flip putTSet cs) <<~ changeChan
|
||||||
|
|
||||||
{- Records a change in the channel. -}
|
{- Records a change in the channel. -}
|
||||||
recordChange :: Change -> Assistant ()
|
recordChange :: Change -> Assistant ()
|
||||||
recordChange c = flip putTSet1 c <<~ changeChan
|
recordChange c = (atomically . flip putTSet1 c) <<~ changeChan
|
||||||
|
|
|
@ -9,19 +9,20 @@ module Assistant.Commits where
|
||||||
|
|
||||||
import Assistant.Common
|
import Assistant.Common
|
||||||
import Assistant.Types.Commits
|
import Assistant.Types.Commits
|
||||||
|
|
||||||
import Utility.TSet
|
import Utility.TSet
|
||||||
|
|
||||||
|
import Control.Concurrent.STM
|
||||||
|
|
||||||
{- Gets all unhandled commits.
|
{- Gets all unhandled commits.
|
||||||
- Blocks until at least one commit is made. -}
|
- Blocks until at least one commit is made. -}
|
||||||
getCommits :: Assistant [Commit]
|
getCommits :: Assistant [Commit]
|
||||||
getCommits = getTSet <<~ commitChan
|
getCommits = (atomically . getTSet) <<~ commitChan
|
||||||
|
|
||||||
{- Puts unhandled commits back into the channel.
|
{- Puts unhandled commits back into the channel.
|
||||||
- Note: Original order is not preserved. -}
|
- Note: Original order is not preserved. -}
|
||||||
refillCommits :: [Commit] -> Assistant ()
|
refillCommits :: [Commit] -> Assistant ()
|
||||||
refillCommits cs = flip putTSet cs <<~ commitChan
|
refillCommits cs = (atomically . flip putTSet cs) <<~ commitChan
|
||||||
|
|
||||||
{- Records a commit in the channel. -}
|
{- Records a commit in the channel. -}
|
||||||
recordCommit :: Assistant ()
|
recordCommit :: Assistant ()
|
||||||
recordCommit = flip putTSet1 Commit <<~ commitChan
|
recordCommit = (atomically . flip putTSet1 Commit) <<~ commitChan
|
||||||
|
|
|
@ -38,6 +38,7 @@ import Data.Time.Clock
|
||||||
import Data.Tuple.Utils
|
import Data.Tuple.Utils
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import Data.Either
|
import Data.Either
|
||||||
|
import Control.Concurrent
|
||||||
|
|
||||||
{- This thread makes git commits at appropriate times. -}
|
{- This thread makes git commits at appropriate times. -}
|
||||||
commitThread :: NamedThread
|
commitThread :: NamedThread
|
||||||
|
@ -45,34 +46,81 @@ commitThread = namedThread "Committer" $ do
|
||||||
delayadd <- liftAnnex $
|
delayadd <- liftAnnex $
|
||||||
maybe delayaddDefault (return . Just . Seconds)
|
maybe delayaddDefault (return . Just . Seconds)
|
||||||
=<< annexDelayAdd <$> Annex.getGitConfig
|
=<< annexDelayAdd <$> Annex.getGitConfig
|
||||||
runEvery (Seconds 1) <~> do
|
waitChangeTime $ \(changes, time) -> do
|
||||||
-- We already waited one second as a simple rate limiter.
|
readychanges <- handleAdds delayadd changes
|
||||||
-- Next, wait until at least one change is available for
|
if shouldCommit time readychanges
|
||||||
-- processing.
|
|
||||||
changes <- getChanges
|
|
||||||
-- Now see if now's a good time to commit.
|
|
||||||
time <- liftIO getCurrentTime
|
|
||||||
if shouldCommit time changes
|
|
||||||
then do
|
then do
|
||||||
readychanges <- handleAdds delayadd changes
|
debug
|
||||||
if shouldCommit time readychanges
|
[ "committing"
|
||||||
then do
|
, show (length readychanges)
|
||||||
debug
|
, "changes"
|
||||||
[ "committing"
|
]
|
||||||
, show (length readychanges)
|
void $ alertWhile commitAlert $
|
||||||
, "changes"
|
liftAnnex commitStaged
|
||||||
]
|
recordCommit
|
||||||
void $ alertWhile commitAlert $
|
mapM_ checkChangeContent readychanges
|
||||||
liftAnnex commitStaged
|
else refill readychanges
|
||||||
recordCommit
|
|
||||||
mapM_ checkChangeContent readychanges
|
refill :: [Change] -> Assistant ()
|
||||||
else refill readychanges
|
refill [] = noop
|
||||||
else refill changes
|
refill cs = do
|
||||||
|
debug ["delaying commit of", show (length cs), "changes"]
|
||||||
|
refillChanges cs
|
||||||
|
|
||||||
|
{- Wait for one or more changes to arrive to be committed. -}
|
||||||
|
waitChangeTime :: (([Change], UTCTime) -> Assistant ()) -> Assistant ()
|
||||||
|
waitChangeTime a = runEvery (Seconds 1) <~> do
|
||||||
|
-- We already waited one second as a simple rate limiter.
|
||||||
|
-- Next, wait until at least one change is available for
|
||||||
|
-- processing.
|
||||||
|
changes <- getChanges
|
||||||
|
-- See if now's a good time to commit.
|
||||||
|
now <- liftIO getCurrentTime
|
||||||
|
case (shouldCommit now changes, lonelychange changes) of
|
||||||
|
(True, False) -> a (changes, now)
|
||||||
|
(True, True) -> do
|
||||||
|
-- Wait for other, related changes to arrive.
|
||||||
|
liftIO $ humanImperceptibleDelay
|
||||||
|
-- Don't block, but are there any?
|
||||||
|
morechanges <- getAnyChanges
|
||||||
|
let allchanges = changes++morechanges
|
||||||
|
a (allchanges, now)
|
||||||
|
_ -> refill changes
|
||||||
where
|
where
|
||||||
refill [] = noop
|
{- Did we perhaps only get one of the AddChange and RmChange pair
|
||||||
refill cs = do
|
- that make up a rename? -}
|
||||||
debug ["delaying commit of", show (length cs), "changes"]
|
lonelychange [(PendingAddChange _ _)] = True
|
||||||
refillChanges cs
|
lonelychange [(Change { changeInfo = i })] | i == RmChange = True
|
||||||
|
lonelychange _ = False
|
||||||
|
|
||||||
|
{- An amount of time that is hopefully imperceptably short for humans,
|
||||||
|
- while long enough for a computer to get some work done.
|
||||||
|
- Note that 0.001 is a little too short for rename change batching to
|
||||||
|
- work. -}
|
||||||
|
humanImperceptibleInterval :: NominalDiffTime
|
||||||
|
humanImperceptibleInterval = 0.01
|
||||||
|
|
||||||
|
humanImperceptibleDelay :: IO ()
|
||||||
|
humanImperceptibleDelay = threadDelay $
|
||||||
|
truncate $ humanImperceptibleInterval * fromIntegral oneSecond
|
||||||
|
|
||||||
|
{- Decide if now is a good time to make a commit.
|
||||||
|
- Note that the list of changes has an undefined order.
|
||||||
|
-
|
||||||
|
- Current strategy: If there have been 10 changes within the past second,
|
||||||
|
- a batch activity is taking place, so wait for later.
|
||||||
|
-}
|
||||||
|
shouldCommit :: UTCTime -> [Change] -> Bool
|
||||||
|
shouldCommit now changes
|
||||||
|
| len == 0 = False
|
||||||
|
| len > 10000 = True -- avoid bloating queue too much
|
||||||
|
| length recentchanges < 10 = True
|
||||||
|
| otherwise = False -- batch activity
|
||||||
|
where
|
||||||
|
len = length changes
|
||||||
|
thissecond c = timeDelta c <= 1
|
||||||
|
recentchanges = filter thissecond changes
|
||||||
|
timeDelta c = now `diffUTCTime` changeTime c
|
||||||
|
|
||||||
commitStaged :: Annex Bool
|
commitStaged :: Annex Bool
|
||||||
commitStaged = do
|
commitStaged = do
|
||||||
|
@ -105,22 +153,6 @@ commitStaged = do
|
||||||
| otherwise = Param "--allow-empty-message"
|
| otherwise = Param "--allow-empty-message"
|
||||||
: Param "-m" : Param "" : ps
|
: Param "-m" : Param "" : ps
|
||||||
|
|
||||||
{- Decide if now is a good time to make a commit.
|
|
||||||
- Note that the list of changes has an undefined order.
|
|
||||||
-
|
|
||||||
- Current strategy: If there have been 10 changes within the past second,
|
|
||||||
- a batch activity is taking place, so wait for later.
|
|
||||||
-}
|
|
||||||
shouldCommit :: UTCTime -> [Change] -> Bool
|
|
||||||
shouldCommit now changes
|
|
||||||
| len == 0 = False
|
|
||||||
| len > 10000 = True -- avoid bloating queue too much
|
|
||||||
| length (filter thisSecond changes) < 10 = True
|
|
||||||
| otherwise = False -- batch activity
|
|
||||||
where
|
|
||||||
len = length changes
|
|
||||||
thisSecond c = now `diffUTCTime` changeTime c <= 1
|
|
||||||
|
|
||||||
{- OSX needs a short delay after a file is added before locking it down,
|
{- OSX needs a short delay after a file is added before locking it down,
|
||||||
- when using a non-direct mode repository, as pasting a file seems to
|
- when using a non-direct mode repository, as pasting a file seems to
|
||||||
- try to set file permissions or otherwise access the file after closing
|
- try to set file permissions or otherwise access the file after closing
|
||||||
|
|
|
@ -12,6 +12,7 @@ import Types.Key
|
||||||
import Utility.TSet
|
import Utility.TSet
|
||||||
|
|
||||||
import Data.Time.Clock
|
import Data.Time.Clock
|
||||||
|
import Control.Concurrent.STM
|
||||||
|
|
||||||
data ChangeInfo = AddChange Key | LinkChange (Maybe Key) | RmChange | RmDirChange
|
data ChangeInfo = AddChange Key | LinkChange (Maybe Key) | RmChange | RmDirChange
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
@ -40,7 +41,7 @@ data Change
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
newChangeChan :: IO ChangeChan
|
newChangeChan :: IO ChangeChan
|
||||||
newChangeChan = newTSet
|
newChangeChan = atomically newTSet
|
||||||
|
|
||||||
isPendingAddChange :: Change -> Bool
|
isPendingAddChange :: Change -> Bool
|
||||||
isPendingAddChange (PendingAddChange {}) = True
|
isPendingAddChange (PendingAddChange {}) = True
|
||||||
|
|
|
@ -9,9 +9,11 @@ module Assistant.Types.Commits where
|
||||||
|
|
||||||
import Utility.TSet
|
import Utility.TSet
|
||||||
|
|
||||||
|
import Control.Concurrent.STM
|
||||||
|
|
||||||
type CommitChan = TSet Commit
|
type CommitChan = TSet Commit
|
||||||
|
|
||||||
data Commit = Commit
|
data Commit = Commit
|
||||||
|
|
||||||
newCommitChan :: IO CommitChan
|
newCommitChan :: IO CommitChan
|
||||||
newCommitChan = newTSet
|
newCommitChan = atomically newTSet
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- Transactional sets
|
{- Transactional sets
|
||||||
-
|
-
|
||||||
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Utility.TSet where
|
module Utility.TSet where
|
||||||
|
@ -11,18 +11,20 @@ import Control.Concurrent.STM
|
||||||
|
|
||||||
type TSet = TChan
|
type TSet = TChan
|
||||||
|
|
||||||
runTSet :: STM a -> IO a
|
newTSet :: STM (TSet a)
|
||||||
runTSet = atomically
|
newTSet = newTChan
|
||||||
|
|
||||||
newTSet :: IO (TSet a)
|
|
||||||
newTSet = atomically newTChan
|
|
||||||
|
|
||||||
{- Gets the contents of the TSet. Blocks until at least one item is
|
{- Gets the contents of the TSet. Blocks until at least one item is
|
||||||
- present. -}
|
- present. -}
|
||||||
getTSet :: TSet a -> IO [a]
|
getTSet :: TSet a -> STM [a]
|
||||||
getTSet tset = runTSet $ do
|
getTSet tset = do
|
||||||
c <- readTChan tset
|
c <- readTChan tset
|
||||||
go [c]
|
l <- readTSet tset
|
||||||
|
return $ c:l
|
||||||
|
|
||||||
|
{- Gets anything currently in the TSet, without blocking. -}
|
||||||
|
readTSet :: TSet a -> STM [a]
|
||||||
|
readTSet tset = go []
|
||||||
where
|
where
|
||||||
go l = do
|
go l = do
|
||||||
v <- tryReadTChan tset
|
v <- tryReadTChan tset
|
||||||
|
@ -31,9 +33,9 @@ getTSet tset = runTSet $ do
|
||||||
Just c -> go (c:l)
|
Just c -> go (c:l)
|
||||||
|
|
||||||
{- Puts items into a TSet. -}
|
{- Puts items into a TSet. -}
|
||||||
putTSet :: TSet a -> [a] -> IO ()
|
putTSet :: TSet a -> [a] -> STM ()
|
||||||
putTSet tset vs = runTSet $ mapM_ (writeTChan tset) vs
|
putTSet tset vs = mapM_ (writeTChan tset) vs
|
||||||
|
|
||||||
{- Put a single item into a TSet. -}
|
{- Put a single item into a TSet. -}
|
||||||
putTSet1 :: TSet a -> a -> IO ()
|
putTSet1 :: TSet a -> a -> STM ()
|
||||||
putTSet1 tset v = void $ runTSet $ writeTChan tset v
|
putTSet1 tset v = void $ writeTChan tset v
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- thread scheduling
|
{- thread scheduling
|
||||||
-
|
-
|
||||||
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
|
||||||
- Copyright 2011 Bas van Dijk & Roel van Dijk
|
- Copyright 2011 Bas van Dijk & Roel van Dijk
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
@ -14,6 +14,7 @@ import Common
|
||||||
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
import System.Posix.Signals
|
import System.Posix.Signals
|
||||||
|
import Data.Time.Clock
|
||||||
#ifndef __ANDROID__
|
#ifndef __ANDROID__
|
||||||
import System.Posix.Terminal
|
import System.Posix.Terminal
|
||||||
#endif
|
#endif
|
||||||
|
@ -21,6 +22,8 @@ import System.Posix.Terminal
|
||||||
newtype Seconds = Seconds { fromSeconds :: Int }
|
newtype Seconds = Seconds { fromSeconds :: Int }
|
||||||
deriving (Eq, Ord, Show)
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
type Microseconds = Integer
|
||||||
|
|
||||||
{- Runs an action repeatedly forever, sleeping at least the specified number
|
{- Runs an action repeatedly forever, sleeping at least the specified number
|
||||||
- of seconds in between. -}
|
- of seconds in between. -}
|
||||||
runEvery :: Seconds -> IO a -> IO a
|
runEvery :: Seconds -> IO a -> IO a
|
||||||
|
@ -30,8 +33,6 @@ runEvery n a = forever $ do
|
||||||
|
|
||||||
threadDelaySeconds :: Seconds -> IO ()
|
threadDelaySeconds :: Seconds -> IO ()
|
||||||
threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
|
threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
|
||||||
where
|
|
||||||
oneSecond = 1000000 -- microseconds
|
|
||||||
|
|
||||||
{- Like threadDelay, but not bounded by an Int.
|
{- Like threadDelay, but not bounded by an Int.
|
||||||
-
|
-
|
||||||
|
@ -42,7 +43,7 @@ threadDelaySeconds (Seconds n) = unboundDelay (fromIntegral n * oneSecond)
|
||||||
- Taken from the unbounded-delay package to avoid a dependency for 4 lines
|
- Taken from the unbounded-delay package to avoid a dependency for 4 lines
|
||||||
- of code.
|
- of code.
|
||||||
-}
|
-}
|
||||||
unboundDelay :: Integer -> IO ()
|
unboundDelay :: Microseconds -> IO ()
|
||||||
unboundDelay time = do
|
unboundDelay time = do
|
||||||
let maxWait = min time $ toInteger (maxBound :: Int)
|
let maxWait = min time $ toInteger (maxBound :: Int)
|
||||||
threadDelay $ fromInteger maxWait
|
threadDelay $ fromInteger maxWait
|
||||||
|
@ -61,3 +62,6 @@ waitForTermination = do
|
||||||
where
|
where
|
||||||
check sig lock = void $
|
check sig lock = void $
|
||||||
installHandler sig (CatchOnce $ putMVar lock ()) Nothing
|
installHandler sig (CatchOnce $ putMVar lock ()) Nothing
|
||||||
|
|
||||||
|
oneSecond :: Microseconds
|
||||||
|
oneSecond = 1000000
|
||||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -42,6 +42,7 @@ git-annex (4.20130228) UNRELEASED; urgency=low
|
||||||
(See https://github.com/yesodweb/wai/issues/146)
|
(See https://github.com/yesodweb/wai/issues/146)
|
||||||
* bugfix: drop --from an unavailable remote no longer updates the location
|
* bugfix: drop --from an unavailable remote no longer updates the location
|
||||||
log, incorrectly, to say the remote does not have the key.
|
log, incorrectly, to say the remote does not have the key.
|
||||||
|
* assistant: Generate better commits for renames.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400
|
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400
|
||||||
|
|
||||||
|
|
|
@ -13,3 +13,17 @@ Fixed some problems around USB drives. One was a real jaw-dropping
|
||||||
bug: "git annex drop --from usbdrive" when the drive was not
|
bug: "git annex drop --from usbdrive" when the drive was not
|
||||||
connected still updated the location log to indicate it did not have
|
connected still updated the location log to indicate it did not have
|
||||||
the file anymore! (Thank goodness for fsck..)
|
the file anymore! (Thank goodness for fsck..)
|
||||||
|
|
||||||
|
I've noticed that moving around files in direct mode repos is inneficient,
|
||||||
|
because the assistant re-checksums the "new" file. One way to avoid
|
||||||
|
that would be to have a lookup table from (inode, size, mtime) to
|
||||||
|
key, but I don't have one, and would like to avoid adding one.
|
||||||
|
|
||||||
|
Instead, I have a cunning plan to deal with this heuristically. If the
|
||||||
|
assistant can notice a file was removed and another file added at the same
|
||||||
|
time, it can compare the (inode, size, mtime) to see if it's a rename, and
|
||||||
|
avoid the checksum overhead.
|
||||||
|
|
||||||
|
The first step to getting there was to make the assistant better at
|
||||||
|
batching together delete+add events into a single rename commit. I'm happy
|
||||||
|
to say I've accomplished that, with no perceptable delay to commits.
|
||||||
|
|
Loading…
Add table
Reference in a new issue