convert incremental fsck to using sqlite database
Did not keep backwards compat for sticky bit records. An incremental fsck that is already in progress will start over on upgrade to this version. This is not yet ready for merging. The autobuilders need to have sqlite installed. Also, interrupting a fsck --incremental does not commit the database. So, resuming with fsck --more restarts from beginning. Memory: Constant during a fsck of tens of thousands of files. (But, it does seem to buffer whole transation in memory, so may really scale with number of files.) CPU: ?
This commit is contained in:
parent
bb242bdd82
commit
91e9146d1b
9 changed files with 185 additions and 177 deletions
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010-2013 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -26,13 +26,13 @@ import Logs.Trust
|
|||
import Config.NumCopies
|
||||
import Annex.UUID
|
||||
import Utility.DataUnits
|
||||
import Utility.FileMode
|
||||
import Config
|
||||
import Types.Key
|
||||
import Types.CleanupActions
|
||||
import Utility.HumanTime
|
||||
import Git.FilePath
|
||||
import Utility.PID
|
||||
import qualified Database.Fsck as FsckDb
|
||||
|
||||
import Data.Time.Clock.POSIX
|
||||
import Data.Time
|
||||
|
@ -72,6 +72,7 @@ seek ps = do
|
|||
(\k -> startKey i k =<< getNumCopies)
|
||||
(withFilesInGit $ whenAnnexed $ start from i)
|
||||
ps
|
||||
withFsckDb i (liftIO . FsckDb.closeDb)
|
||||
|
||||
getIncremental :: Annex Incremental
|
||||
getIncremental = do
|
||||
|
@ -82,15 +83,17 @@ getIncremental = do
|
|||
case (i, starti, morei) of
|
||||
(False, False, False) -> return NonIncremental
|
||||
(False, True, False) -> startIncremental
|
||||
(False ,False, True) -> ContIncremental <$> getStartTime
|
||||
(False ,False, True) -> contIncremental
|
||||
(True, False, False) ->
|
||||
maybe startIncremental (return . ContIncremental . Just)
|
||||
maybe startIncremental (const contIncremental)
|
||||
=<< getStartTime
|
||||
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
|
||||
where
|
||||
startIncremental = do
|
||||
recordStartTime
|
||||
return StartIncremental
|
||||
FsckDb.newPass
|
||||
StartIncremental <$> FsckDb.openDb
|
||||
contIncremental = ContIncremental <$> FsckDb.openDb
|
||||
|
||||
checkschedule Nothing = error "bad --incremental-schedule value"
|
||||
checkschedule (Just delta) = do
|
||||
|
@ -415,8 +418,7 @@ badContentRemote remote key = do
|
|||
return $ (if ok then "dropped from " else "failed to drop from ")
|
||||
++ Remote.name remote
|
||||
|
||||
data Incremental = StartIncremental | ContIncremental (Maybe EpochTime) | NonIncremental
|
||||
deriving (Eq, Show)
|
||||
data Incremental = StartIncremental FsckDb.DbHandle | ContIncremental FsckDb.DbHandle | NonIncremental
|
||||
|
||||
runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
|
||||
runFsck inc file key a = ifM (needFsck inc key)
|
||||
|
@ -425,48 +427,23 @@ runFsck inc file key a = ifM (needFsck inc key)
|
|||
next $ do
|
||||
ok <- a
|
||||
when ok $
|
||||
recordFsckTime key
|
||||
recordFsckTime inc key
|
||||
next $ return ok
|
||||
, stop
|
||||
)
|
||||
|
||||
{- Check if a key needs to be fscked, with support for incremental fscks. -}
|
||||
needFsck :: Incremental -> Key -> Annex Bool
|
||||
needFsck (ContIncremental Nothing) _ = return True
|
||||
needFsck (ContIncremental starttime) key = do
|
||||
fscktime <- getFsckTime key
|
||||
return $ fscktime < starttime
|
||||
needFsck (ContIncremental h) key = not <$> FsckDb.inDb h key
|
||||
needFsck _ _ = return True
|
||||
|
||||
{- To record the time that a key was last fscked, without
|
||||
- modifying its mtime, we set the timestamp of its parent directory.
|
||||
- Each annexed file is the only thing in its directory, so this is fine.
|
||||
-
|
||||
- To record that the file was fscked, the directory's sticky bit is set.
|
||||
- (None of the normal unix behaviors of the sticky bit should matter, so
|
||||
- we can reuse this permission bit.)
|
||||
-
|
||||
- Note that this relies on the parent directory being deleted when a file
|
||||
- is dropped. That way, if it's later added back, the fsck record
|
||||
- won't still be present.
|
||||
-}
|
||||
recordFsckTime :: Key -> Annex ()
|
||||
recordFsckTime key = do
|
||||
parent <- parentDir <$> calcRepo (gitAnnexLocation key)
|
||||
liftIO $ void $ tryIO $ do
|
||||
touchFile parent
|
||||
#ifndef mingw32_HOST_OS
|
||||
setSticky parent
|
||||
#endif
|
||||
withFsckDb :: Incremental -> (FsckDb.DbHandle -> Annex ()) -> Annex ()
|
||||
withFsckDb (ContIncremental h) a = a h
|
||||
withFsckDb (StartIncremental h) a = a h
|
||||
withFsckDb NonIncremental _ = noop
|
||||
|
||||
getFsckTime :: Key -> Annex (Maybe EpochTime)
|
||||
getFsckTime key = do
|
||||
parent <- parentDir <$> calcRepo (gitAnnexLocation key)
|
||||
liftIO $ catchDefaultIO Nothing $ do
|
||||
s <- getFileStatus parent
|
||||
return $ if isSticky $ fileMode s
|
||||
then Just $ modificationTime s
|
||||
else Nothing
|
||||
recordFsckTime :: Incremental -> Key -> Annex ()
|
||||
recordFsckTime inc key = withFsckDb inc $ \h -> FsckDb.addDb h key
|
||||
|
||||
{- Records the start time of an incremental fsck.
|
||||
-
|
||||
|
|
71
Database/Fsck.hs
Normal file
71
Database/Fsck.hs
Normal file
|
@ -0,0 +1,71 @@
|
|||
{- Sqlite database used for incremental fsck.
|
||||
-
|
||||
- Copyright 2015 Joey Hess <id@joeyh.name>
|
||||
-:
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
|
||||
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
|
||||
|
||||
module Database.Fsck (
|
||||
newPass,
|
||||
openDb,
|
||||
H.closeDb,
|
||||
H.DbHandle,
|
||||
addDb,
|
||||
inDb,
|
||||
FsckedId,
|
||||
) where
|
||||
|
||||
import Database.Types
|
||||
import qualified Database.Handle as H
|
||||
import Locations
|
||||
import Utility.Directory
|
||||
import Annex
|
||||
import Types.Key
|
||||
import Annex.Perms
|
||||
|
||||
import Database.Persist.TH
|
||||
import Database.Esqueleto hiding (Key)
|
||||
import Control.Monad
|
||||
import Control.Monad.IfElse
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import System.Directory
|
||||
|
||||
{- Each key stored in the database has already been fscked as part
|
||||
- of the latest incremental fsck pass. -}
|
||||
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
|
||||
Fscked
|
||||
key SKey
|
||||
UniqueKey key
|
||||
deriving Show
|
||||
|]
|
||||
|
||||
{- The database is removed when starting a new incremental fsck pass. -}
|
||||
newPass :: Annex ()
|
||||
newPass = liftIO. nukeFile =<< fromRepo gitAnnexFsckDb
|
||||
|
||||
{- Opens the database, creating it atomically if it doesn't exist yet. -}
|
||||
openDb :: Annex H.DbHandle
|
||||
openDb = do
|
||||
db <- fromRepo gitAnnexFsckDb
|
||||
unlessM (liftIO $ doesFileExist db) $ do
|
||||
let newdb = db ++ ".new"
|
||||
h <- liftIO $ H.openDb newdb
|
||||
void $ liftIO $ H.runDb h $
|
||||
runMigrationSilent migrateFsck
|
||||
liftIO $ H.closeDb h
|
||||
setAnnexFilePerm newdb
|
||||
liftIO $ renameFile newdb db
|
||||
liftIO $ H.openDb db
|
||||
|
||||
addDb :: H.DbHandle -> Key -> Annex ()
|
||||
addDb h = void . liftIO . H.runDb h . insert . Fscked . toSKey
|
||||
|
||||
inDb :: H.DbHandle -> Key -> Annex Bool
|
||||
inDb h k = liftIO $ H.runDb h $ do
|
||||
r <- select $ from $ \r -> do
|
||||
where_ (r ^. FsckedKey ==. val (toSKey k))
|
||||
return (r ^. FsckedKey)
|
||||
return $ not $ null r
|
63
Database/Handle.hs
Normal file
63
Database/Handle.hs
Normal file
|
@ -0,0 +1,63 @@
|
|||
{- Persistent sqlite database handles.
|
||||
-
|
||||
- Copyright 2015 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Database.Handle (
|
||||
DbHandle,
|
||||
openDb,
|
||||
closeDb,
|
||||
runDb,
|
||||
) where
|
||||
|
||||
import Utility.Exception
|
||||
|
||||
import Database.Persist.Sqlite (runSqlite)
|
||||
import Database.Esqueleto hiding (Key)
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Concurrent
|
||||
import Control.Concurrent.Async
|
||||
import Control.Exception (throwIO)
|
||||
import qualified Data.Text as T
|
||||
|
||||
{- A DbHandle is a reference to a worker thread that communicates with
|
||||
- the database. It has a MVar which Jobs are submitted to. -}
|
||||
data DbHandle = DbHandle (Async ()) (MVar Job)
|
||||
|
||||
data Job = Job (SqlPersistM ()) | CloseJob
|
||||
|
||||
openDb :: FilePath -> IO DbHandle
|
||||
openDb db = do
|
||||
jobs <- newEmptyMVar
|
||||
worker <- async (workerThread db jobs)
|
||||
return $ DbHandle worker jobs
|
||||
|
||||
workerThread :: FilePath -> MVar Job -> IO ()
|
||||
workerThread db jobs = runSqlite (T.pack db) go
|
||||
where
|
||||
go = do
|
||||
job <- liftIO $ takeMVar jobs
|
||||
case job of
|
||||
Job a -> a >> go
|
||||
CloseJob -> return ()
|
||||
|
||||
closeDb :: DbHandle -> IO ()
|
||||
closeDb (DbHandle worker jobs) = do
|
||||
putMVar jobs CloseJob
|
||||
wait worker
|
||||
|
||||
{- Runs an action using the DbHandle.
|
||||
-
|
||||
- Note that the action is not run by the calling thread, but by a
|
||||
- worker thread. Exceptions are propigated to the calling thread.
|
||||
-
|
||||
- Note that only one action can be run at a time against a given DbHandle.
|
||||
- If called concurrently, this will block until it is able to run.
|
||||
-}
|
||||
runDb :: DbHandle -> SqlPersistM a -> IO a
|
||||
runDb (DbHandle _ jobs) a = do
|
||||
res <- newEmptyMVar
|
||||
putMVar jobs $ Job $ liftIO . putMVar res =<< tryNonAsync a
|
||||
either throwIO return =<< takeMVar res
|
27
Database/Types.hs
Normal file
27
Database/Types.hs
Normal file
|
@ -0,0 +1,27 @@
|
|||
{- types for SQL databases
|
||||
-
|
||||
- Copyright 2015 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
|
||||
module Database.Types where
|
||||
|
||||
import Database.Persist.TH
|
||||
import Data.Maybe
|
||||
|
||||
import Types.Key
|
||||
|
||||
-- A serialized Key
|
||||
newtype SKey = SKey String
|
||||
deriving (Show, Read)
|
||||
|
||||
toSKey :: Key -> SKey
|
||||
toSKey = SKey . key2file
|
||||
|
||||
fromSKey :: SKey -> Key
|
||||
fromSKey (SKey s) = fromMaybe (error $ "bad serialied key " ++ s) (file2key s)
|
||||
|
||||
derivePersistField "SKey"
|
20
Footype.hs
20
Footype.hs
|
@ -1,20 +0,0 @@
|
|||
{-# LANGUAGE QuasiQuotes, TypeFamilies, GeneralizedNewtypeDeriving, TemplateHaskell,
|
||||
OverloadedStrings, GADTs, FlexibleContexts #-}
|
||||
|
||||
module Footype where
|
||||
|
||||
import Database.Persist hiding (Key)
|
||||
import Database.Persist.TH
|
||||
import Database.Persist.Sqlite hiding (Key)
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Monad
|
||||
import Data.Time.Clock
|
||||
|
||||
import Types.Key
|
||||
import Types.UUID
|
||||
import Types.MetaData
|
||||
|
||||
-- has to be in a separate file from foo.hs for silly reasons
|
||||
derivePersistField "Key"
|
||||
derivePersistField "UUID"
|
||||
derivePersistField "MetaField"
|
|
@ -57,6 +57,7 @@ module Locations (
|
|||
gitAnnexSshDir,
|
||||
gitAnnexRemotesDir,
|
||||
gitAnnexAssistantDefaultDir,
|
||||
gitAnnexFsckDb,
|
||||
isLinkToAnnex,
|
||||
HashLevels(..),
|
||||
hashDirMixed,
|
||||
|
@ -340,6 +341,10 @@ gitAnnexRemotesDir r = addTrailingPathSeparator $ gitAnnexDir r </> "remotes"
|
|||
gitAnnexAssistantDefaultDir :: FilePath
|
||||
gitAnnexAssistantDefaultDir = "annex"
|
||||
|
||||
{- Database used to record fsck info. -}
|
||||
gitAnnexFsckDb :: Git.Repo -> FilePath
|
||||
gitAnnexFsckDb r = gitAnnexDir r </> "fsck.db"
|
||||
|
||||
{- Checks a symlink target to see if it appears to point to annexed content.
|
||||
-
|
||||
- We only look at paths inside the .git directory, and not at the .git
|
||||
|
|
74
foo.hs
74
foo.hs
|
@ -1,74 +0,0 @@
|
|||
{-# LANGUAGE QuasiQuotes, TypeFamilies, GeneralizedNewtypeDeriving, TemplateHaskell,
|
||||
OverloadedStrings, GADTs, FlexibleContexts #-}
|
||||
import Database.Persist hiding (Key)
|
||||
import Database.Persist.TH
|
||||
import Database.Persist.Sqlite hiding (Key)
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Monad
|
||||
import Data.Time.Clock
|
||||
import Data.Maybe
|
||||
|
||||
import Types.Key
|
||||
import Types.UUID
|
||||
import Types.MetaData
|
||||
import Footype
|
||||
|
||||
data RemoteFsckTime
|
||||
|
||||
share [mkPersist sqlSettings, mkSave "entityDefs", mkMigrate "migrateAll"] [persistLowerCase|
|
||||
CachedKey
|
||||
key Key
|
||||
KeyOutdex key
|
||||
deriving Show
|
||||
|
||||
AssociatedFiles
|
||||
keyId CachedKeyId Eq
|
||||
associatedFile FilePath
|
||||
KeyIdOutdex keyId associatedFile
|
||||
deriving Show
|
||||
|
||||
CachedMetaField
|
||||
field MetaField
|
||||
FieldOutdex field
|
||||
deriving Show
|
||||
|
||||
CachedMetaData
|
||||
keyId CachedKeyId Eq
|
||||
fieldId CachedMetaFieldId Eq
|
||||
metaValue String
|
||||
deriving Show
|
||||
|
||||
LastFscked
|
||||
keyId CachedKeyId Eq
|
||||
localFscked Int Maybe
|
||||
|]
|
||||
|
||||
main :: IO ()
|
||||
main = query
|
||||
|
||||
query :: IO ()
|
||||
query = runSqlite "foo.db" $ do
|
||||
forM_ [1..1000] $ \i -> do
|
||||
Just k <- getBy $ KeyOutdex (fromJust $ file2key $ "WORM--" ++ show i)
|
||||
selectList [AssociatedFilesKeyId ==. entityKey k] []
|
||||
|
||||
query2 :: IO ()
|
||||
query2 = runSqlite "foo.db" $ do
|
||||
forM_ [1..1] $ \i -> do
|
||||
Just f <- getBy $ FieldOutdex (fromJust $ toMetaField "tag")
|
||||
liftIO $ print f
|
||||
fs <- selectList [CachedMetaDataFieldId ==. entityKey f] []
|
||||
liftIO $ print $ length fs
|
||||
|
||||
populate :: IO ()
|
||||
populate = runSqlite "foo.db" $ do
|
||||
runMigration migrateAll
|
||||
t <- insert $ CachedMetaField (fromJust $ toMetaField "tag")
|
||||
f <- insert $ CachedMetaField (fromJust $ toMetaField "foo")
|
||||
forM_ [1..30000] $ \i -> do
|
||||
k <- insert $ CachedKey (fromJust $ file2key $ "WORM--" ++ show i)
|
||||
liftIO $ print k
|
||||
insert $ AssociatedFiles k (show i)
|
||||
insert $ AssociatedFiles k ("and" ++ show (i + 1))
|
||||
insert $ CachedMetaData k f (show i)
|
||||
insert $ CachedMetaData k t "bar"
|
42
fooes.hs
42
fooes.hs
|
@ -1,42 +0,0 @@
|
|||
{-# LANGUAGE QuasiQuotes, TypeFamilies, GeneralizedNewtypeDeriving, TemplateHaskell,
|
||||
OverloadedStrings, GADTs, FlexibleContexts #-}
|
||||
import Database.Persist.TH
|
||||
import Database.Persist.Sqlite (runSqlite)
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Monad
|
||||
import Database.Esqueleto hiding (Key)
|
||||
|
||||
share [mkPersist sqlSettings, mkMigrate "migrateAll"] [persistLowerCase|
|
||||
CachedKey
|
||||
key String
|
||||
UniqueKey key
|
||||
deriving Show
|
||||
|
||||
AssociatedFiles
|
||||
key CachedKeyId Eq
|
||||
file FilePath
|
||||
UniqueKeyFile key file
|
||||
deriving Show
|
||||
|]
|
||||
|
||||
main :: IO ()
|
||||
main = runSqlite "foo.db" $ do
|
||||
runMigration migrateAll
|
||||
if True then populate else return ()
|
||||
query
|
||||
|
||||
populate = do
|
||||
forM_ [1..30000] $ \i -> do
|
||||
--delete $ from $ \f -> do
|
||||
-- where_ (f ^. CachedKeyKey ==. val (show i))
|
||||
k <- insert $ CachedKey (show i)
|
||||
liftIO $ print ("stored", k)
|
||||
insert $ AssociatedFiles k ("file" ++show (i+1))
|
||||
--insert $ AssociatedFiles k ("otherfile" ++show (i+2))
|
||||
|
||||
query = forM_ [1..1000] $ \i -> do
|
||||
r <- select $ from $ \(k, f) -> do
|
||||
where_ (k ^. CachedKeyKey ==. val (show i))
|
||||
where_ (f ^. AssociatedFilesKey ==. k ^. CachedKeyId)
|
||||
return (f ^. AssociatedFilesFile)
|
||||
liftIO $ print ("got", r)
|
|
@ -110,7 +110,8 @@ Executable git-annex
|
|||
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance,
|
||||
SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3),
|
||||
data-default, case-insensitive, http-conduit, http-types,
|
||||
cryptohash (>= 0.10.0)
|
||||
cryptohash (>= 0.10.0),
|
||||
esqueleto, persistent-sqlite, persistent, persistent-template
|
||||
CC-Options: -Wall
|
||||
GHC-Options: -Wall
|
||||
Extensions: PackageImports
|
||||
|
|
Loading…
Reference in a new issue