add ExportTree table to export db

New table needed to look up what filenames are used in the currently
exported tree, for reasons explained in export.mdwn.

Also, added smart constructors for ExportLocation and ExportDirectory to
make sure they contain filepaths with the right direction slashes.

And some code refactoring.

This commit was sponsored by Francois Marier on Patreon.
This commit is contained in:
Joey Hess 2017-09-18 13:57:25 -04:00
parent 486902389d
commit b03d77c211
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 247 additions and 93 deletions

35
Annex/Export.hs Normal file
View file

@ -0,0 +1,35 @@
{- git-annex exports
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Export where
import Annex
import Annex.CatFile
import Types.Key
import qualified Git
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
asKey (GitKey k) = k
exportKey :: Git.Sha -> Annex ExportKey
exportKey sha = mk <$> catKey sha
where
mk (Just k) = AnnexKey k
mk Nothing = GitKey $ Key
{ keyName = show sha
, keyVariety = SHA1Key (HasExt False)
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
}

View file

@ -21,6 +21,7 @@ import Git.Sha
import Types.Key
import Types.Remote
import Types.Export
import Annex.Export
import Annex.Content
import Annex.CatFile
import Annex.LockFile
@ -53,28 +54,6 @@ optParser _ = ExportOptions
( metavar paramTreeish
)
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
asKey (GitKey k) = k
exportKey :: Git.Sha -> Annex ExportKey
exportKey sha = mk <$> catKey sha
where
mk (Just k) = AnnexKey k
mk Nothing = GitKey $ Key
{ keyName = show sha
, keyVariety = SHA1Key (HasExt False)
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
}
-- To handle renames which swap files, the exported file is first renamed
-- to a stable temporary name based on the key.
exportTempName :: ExportKey -> ExportLocation
@ -153,7 +132,8 @@ seek' o r = do
-- if this export is interrupted, there are no files left over
-- from a previous export, that are not part of this export.
c <- Annex.getState Annex.errcounter
when (c == 0) $
when (c == 0) $ do
liftIO $ recordDataSource db new
recordExport (uuid r) $ ExportChange
{ oldTreeish = map exportedTreeish old
, newTreeish = new
@ -184,24 +164,24 @@ mkDiffMap old new = do
where
combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb)
mkdm i = do
srcek <- getk (Git.DiffTree.srcsha i)
dstek <- getk (Git.DiffTree.dstsha i)
srcek <- getek (Git.DiffTree.srcsha i)
dstek <- getek (Git.DiffTree.dstsha i)
return $ catMaybes
[ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
, (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
]
getk sha
getek sha
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha
startExport :: Remote -> ExportActions Annex -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r ea db ti = do
ek <- exportKey (Git.LsTree.sha ti)
stopUnless (liftIO $ notElem loc <$> getExportLocation db (asKey ek)) $ do
stopUnless (liftIO $ notElem loc <$> getExportedLocation db (asKey ek)) $ do
showStart "export" f
next $ performExport r ea db ek (Git.LsTree.sha ti) loc
where
loc = ExportLocation $ toInternalGitPath f
loc = mkExportLocation f
f = getTopFilePath $ Git.LsTree.file ti
performExport :: Remote -> ExportActions Annex -> ExportHandle -> ExportKey -> Sha -> ExportLocation -> CommandPerform
@ -231,7 +211,7 @@ performExport r ea db ek contentsha loc = do
cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup
cleanupExport r db ek loc = do
liftIO $ addExportLocation db (asKey ek) loc
liftIO $ addExportedLocation db (asKey ek) loc
logChange (asKey ek) (uuid r) InfoPresent
return True
@ -244,7 +224,7 @@ startUnexport r ea db f shas = do
showStart "unexport" f'
next $ performUnexport r ea db eks loc
where
loc = ExportLocation $ toInternalGitPath f'
loc = mkExportLocation f'
f' = getTopFilePath f
startUnexport' :: Remote -> ExportActions Annex -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
@ -252,7 +232,7 @@ startUnexport' r ea db f ek = do
showStart "unexport" f'
next $ performUnexport r ea db [ek] loc
where
loc = ExportLocation $ toInternalGitPath f'
loc = mkExportLocation f'
f' = getTopFilePath f
performUnexport :: Remote -> ExportActions Annex -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
@ -266,11 +246,11 @@ cleanupUnexport :: Remote -> ExportActions Annex -> ExportHandle -> [ExportKey]
cleanupUnexport r ea db eks loc = do
liftIO $ do
forM_ eks $ \ek ->
removeExportLocation db (asKey ek) loc
removeExportedLocation db (asKey ek) loc
flushDbQueue db
remaininglocs <- liftIO $
concat <$> forM eks (\ek -> getExportLocation db (asKey ek))
concat <$> forM eks (\ek -> getExportedLocation db (asKey ek))
when (null remaininglocs) $
forM_ eks $ \ek ->
logChange (asKey ek) (uuid r) InfoMissing
@ -282,31 +262,31 @@ startRecoverIncomplete r ea db sha oldf
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let loc@(ExportLocation f) = exportTempName ek
showStart "unexport" f
liftIO $ removeExportLocation db (asKey ek) oldloc
let loc = exportTempName ek
showStart "unexport" (fromExportLocation f)
liftIO $ removeExportedLocation db (asKey ek) oldloc
next $ performUnexport r ea db [ek] loc
where
oldloc = ExportLocation $ toInternalGitPath oldf'
oldloc = mkExportLocation oldf'
oldf' = getTopFilePath oldf
startMoveToTempName :: Remote -> ExportActions Annex -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startMoveToTempName r ea db f ek = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
showStart "rename" (f' ++ " -> " ++ tmpf)
showStart "rename" (f' ++ " -> " ++ fromExportLocation tmploc)
next $ performRename r ea db ek loc tmploc
where
loc = ExportLocation $ toInternalGitPath f'
loc = mkExportLocation f'
f' = getTopFilePath f
tmploc = exportTempName ek
startMoveFromTempName :: Remote -> ExportActions Annex -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
startMoveFromTempName r ea db ek f = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
showStart "rename" (tmpf ++ " -> " ++ f')
let tmploc = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportedLocation db (asKey ek)) $ do
showStart "rename" (exportLocation tmploc ++ " -> " ++ f')
next $ performRename r ea db ek tmploc loc
where
loc = ExportLocation $ toInternalGitPath f'
loc = mkExportLocation f'
f' = getTopFilePath f
performRename :: Remote -> ExportActions Annex -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandPerform
@ -323,8 +303,8 @@ performRename r ea db ek src dest = do
cleanupRename :: ExportActions Annex -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandCleanup
cleanupRename ea db ek src dest = do
liftIO $ do
removeExportLocation db (asKey ek) src
addExportLocation db (asKey ek) dest
removeExportedLocation db (asKey ek) src
addExportedLocation db (asKey ek) dest
flushDbQueue db
if exportDirectories src /= exportDirectories dest
then removeEmptyDirectories ea db src [asKey ek]

View file

@ -14,13 +14,19 @@ module Database.Export (
ExportHandle,
openDb,
closeDb,
addExportLocation,
removeExportLocation,
flushDbQueue,
getExportLocation,
recordDataSource,
getDataSource,
addExportedLocation,
removeExportedLocation,
getExportedLocation,
isExportDirectoryEmpty,
getExportTree,
updateExportTree,
ExportedId,
ExportTreeId,
ExportedDirectoryId,
DataSourceId,
) where
import Database.Types
@ -29,6 +35,11 @@ import Database.Init
import Annex.Locations
import Annex.Common hiding (delete)
import Types.Export
import Annex.Export
import Git.Types
import Git.Sha
import Git.FilePath
import qualified Git.DiffTree
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
@ -36,14 +47,26 @@ import Database.Esqueleto hiding (Key)
newtype ExportHandle = ExportHandle H.DbQueue
share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase|
-- Files that have been exported to the remote.
Exported
key IKey
file SFilePath
KeyFileIndex key file
ExportedIndex key file
-- The tree that has been exported to the remote.
-- Not all of these files are necessarily present on the remote yet.
ExportTree
key IKey
file SFilePath
ExportTreeIndex key file
-- Directories that exist on the remote, and the files that are in them.
ExportedDirectory
subdir SFilePath
file SFilePath
SubdirFileIndex subdir file
ExportedDirectoryIndex subdir file
-- Record of what tree the current database content comes from.
DataSource
tree SRef
UniqueTree tree
|]
{- Opens the database, creating it if it doesn't exist yet. -}
@ -68,48 +91,110 @@ queueDb (ExportHandle h) = H.queueDb h checkcommit
| sz > 1000 = return True
| otherwise = return False
addExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportLocation h k el@(ExportLocation f) = queueDb h $ do
flushDbQueue :: ExportHandle -> IO ()
flushDbQueue (ExportHandle h) = H.flushDbQueue h
recordDataSource :: ExportHandle -> Sha -> IO ()
recordDataSource h s = queueDb h $ do
delete $ from $ \r -> do
where_ (r ^. DataSourceTree ==. r ^. DataSourceTree)
void $ insertUnique $ DataSource (toSRef s)
getDataSource :: ExportHandle -> IO (Maybe Sha)
getDataSource (ExportHandle h) = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
where_ (r ^. DataSourceTree ==. r ^. DataSourceTree)
return (r ^. DataSourceTree)
case l of
(s:[]) -> return (Just (fromSRef (unValue s)))
_ -> return Nothing
addExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportedLocation h k el = queueDb h $ do
void $ insertUnique $ Exported ik ef
insertMany_ $ map
(\(ExportDirectory d) -> ExportedDirectory (toSFilePath d) ef)
(\ed -> ExportedDirectory (toSFilePath (fromExportDirectory ed)) ef)
(exportDirectories el)
where
ik = toIKey k
ef = toSFilePath f
ef = toSFilePath (fromExportLocation el)
removeExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
removeExportLocation h k el@(ExportLocation f) = queueDb h $ do
removeExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
removeExportedLocation h k el = queueDb h $ do
delete $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik &&. r ^. ExportedFile ==. val ef)
let subdirs = map (\(ExportDirectory d) -> toSFilePath d)
let subdirs = map (toSFilePath . fromExportDirectory)
(exportDirectories el)
delete $ from $ \r -> do
where_ (r ^. ExportedDirectoryFile ==. val ef
&&. r ^. ExportedDirectorySubdir `in_` valList subdirs)
where
ik = toIKey k
ef = toSFilePath f
flushDbQueue :: ExportHandle -> IO ()
flushDbQueue (ExportHandle h) = H.flushDbQueue h
ef = toSFilePath (fromExportLocation el)
{- Note that this does not see recently queued changes. -}
getExportLocation :: ExportHandle -> Key -> IO [ExportLocation]
getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do
getExportedLocation :: ExportHandle -> Key -> IO [ExportLocation]
getExportedLocation (ExportHandle h) k = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik)
return (r ^. ExportedFile)
return $ map (ExportLocation . fromSFilePath . unValue) l
return $ map (mkExportLocation . fromSFilePath . unValue) l
where
ik = toIKey k
{- Note that this does not see recently queued changes. -}
isExportDirectoryEmpty :: ExportHandle -> ExportDirectory -> IO Bool
isExportDirectoryEmpty (ExportHandle h) (ExportDirectory d) = H.queryDbQueue h $ do
isExportDirectoryEmpty (ExportHandle h) d = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
where_ (r ^. ExportedDirectorySubdir ==. val ed)
return (r ^. ExportedDirectoryFile)
return $ null l
where
ed = toSFilePath d
ed = toSFilePath $ fromExportDirectory d
{- Get locations in the export that might contain a key. -}
getExportTree :: ExportHandle -> Key -> IO [ExportLocation]
getExportTree (ExportHandle h) k = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
where_ (r ^. ExportTreeKey ==. val ik)
return (r ^. ExportTreeFile)
return $ map (mkExportLocation . fromSFilePath . unValue) l
where
ik = toIKey k
addExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportTree h k loc = queueDb h $
void $ insertUnique $ Exported ik ef
where
ik = toIKey k
ef = toSFilePath (fromExportLocation loc)
removeExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
removeExportTree h k loc = queueDb h $
delete $ from $ \r ->
where_ (r ^. ExportTreeKey ==. val ik &&. r ^. ExportTreeFile ==. val ef)
where
ik = toIKey k
ef = toSFilePath (fromExportLocation loc)
{- Diff from the old to the new tree and update the ExportTree table. -}
updateExportTree :: ExportHandle -> Sha -> Sha -> Annex ()
updateExportTree h old new = do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive old new
forM_ diff $ \i -> do
let loc = mkExportLocation $ getTopFilePath $
Git.DiffTree.file i
srcek <- getek (Git.DiffTree.srcsha i)
case srcek of
Nothing -> return ()
Just k -> liftIO $ removeExportTree h (asKey k) loc
dstek <- getek (Git.DiffTree.dstsha i)
case dstek of
Nothing -> return ()
Just k -> liftIO $ addExportTree h (asKey k) loc
void $ liftIO cleanup
where
getek sha
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha

View file

@ -1,6 +1,6 @@
{- types for SQL databases
-
- Copyright 2015-2016 Joey Hess <id@joeyh.name>
- Copyright 2015-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -16,6 +16,7 @@ import Data.Char
import Utility.PartialPrelude
import Key
import Utility.InodeCache
import Git.Types (Ref(..))
-- A serialized Key
newtype SKey = SKey String
@ -93,3 +94,21 @@ fromSFilePath (SFilePath s) = s
derivePersistField "SFilePath"
-- A serialized Ref
newtype SRef = SRef Ref
-- Note that Read instance does not work when used in any kind of complex
-- data structure.
instance Read SRef where
readsPrec _ s = [(SRef (Ref s), "")]
instance Show SRef where
show (SRef (Ref s)) = s
derivePersistField "SRef"
toSRef :: Ref -> SRef
toSRef = SRef
fromSRef :: SRef -> Ref
fromSRef (SRef r) = r

View file

@ -274,14 +274,14 @@ renameExportM d _k oldloc newloc = liftIO $ catchBoolIO $ do
dest = exportPath d newloc
exportPath :: FilePath -> ExportLocation -> FilePath
exportPath d (ExportLocation loc) = d </> loc
exportPath d loc = d </> fromExportLocation loc
{- Removes the ExportLocation directory and its parents, so long as
- they're empty, up to but not including the topdir. -}
removeExportLocation :: FilePath -> ExportLocation -> IO ()
removeExportLocation topdir (ExportLocation loc) = go (Just loc) (Right ())
removeExportLocation topdir loc = go (Just $ fromExportLocation loc) (Right ())
where
go _ (Left _e) = return ()
go Nothing _ = return ()
go (Just loc') _ = go (upFrom loc')
=<< tryIO (removeDirectory $ exportPath topdir (ExportLocation loc'))
=<< tryIO (removeDirectory $ exportPath topdir (mkExportLocation loc'))

View file

@ -358,9 +358,9 @@ instance Proto.Serializable URI where
deserialize = parseURI
instance Proto.Serializable ExportLocation where
serialize (ExportLocation loc) = loc
deserialize = Just . ExportLocation
serialize = fromExportLocation
deserialize = Just . mkExportLocation
instance Proto.Serializable ExportDirectory where
serialize (ExportDirectory loc) = loc
deserialize = Just . ExportDirectory
serialize = fromExportDirectory
deserialize = Just . mkExportDirectory

View file

@ -93,7 +93,7 @@ adjustExportable r = case M.lookup "exporttree" (config r) of
-- Storing a key on an export would need a way to
-- look up the file(s) that the currently exported
-- tree uses for a key; there's not currently an
-- inexpensive way to do that (getExportLocation
-- inexpensive way to do that (getExportedLocation
-- only finds files that have been stored on the
-- export already).
{ storeKey = \_ _ _ -> do
@ -105,7 +105,7 @@ adjustExportable r = case M.lookup "exporttree" (config r) of
, retrieveKeyFile = \k _af dest p -> unVerified $
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
then do
locs <- liftIO $ getExportLocation db k
locs <- liftIO $ getExportedLocation db k
case locs of
[] -> do
warning "unknown export location"
@ -136,7 +136,7 @@ adjustExportable r = case M.lookup "exporttree" (config r) of
, checkPresent = \k -> do
ea <- exportActions r
anyM (checkPresentExport ea k)
=<< liftIO (getExportLocation db k)
=<< liftIO (getExportedLocation db k)
, mkUnavailable = return Nothing
, getInfo = do
is <- getInfo r
@ -155,10 +155,10 @@ removeEmptyDirectories ea db loc ks
ok <- allM (go removeexportdirectory)
(reverse (exportDirectories loc))
unless ok $ liftIO $ do
-- Add back to export database, so this is
-- tried again next time.
-- Add location back to export database,
-- so this is tried again next time.
forM_ ks $ \k ->
addExportLocation db k loc
addExportedLocation db k loc
flushDbQueue db
return ok
where

View file

@ -615,7 +615,7 @@ getBucketObject c = munge . key2file
_ -> getFilePrefix c ++ s
getBucketExportLocation :: RemoteConfig -> ExportLocation -> FilePath
getBucketExportLocation c (ExportLocation loc) = getFilePrefix c ++ loc
getBucketExportLocation c loc = getFilePrefix c ++ fromExportLocation loc
{- Internet Archive documentation limits filenames to a subset of ascii.
- While other characters seem to work now, this entity encodes everything

View file

@ -204,8 +204,8 @@ removeExportDav mh _k loc = runExport mh $ \_dav ->
removeHelper (exportLocation loc)
removeExportDirectoryDav :: Maybe DavHandle -> ExportDirectory -> Annex Bool
removeExportDirectoryDav mh (ExportDirectory dir) = runExport mh $ \_dav ->
safely (inLocation dir delContentM)
removeExportDirectoryDav mh dir = runExport mh $ \_dav ->
safely (inLocation (fromExportDirectory dir) delContentM)
>>= maybe (return False) (const $ return True)
renameExportDav :: Maybe DavHandle -> Key -> ExportLocation -> ExportLocation -> Annex Bool

View file

@ -47,7 +47,7 @@ keyLocation :: Key -> DavLocation
keyLocation k = keyDir k ++ keyFile k
exportLocation :: ExportLocation -> DavLocation
exportLocation (ExportLocation f) = f
exportLocation = fromExportLocation
{- Where we store temporary data for a key as it's being uploaded. -}
keyTmpLocation :: Key -> DavLocation

View file

@ -5,19 +5,41 @@
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.Export where
module Types.Export (
ExportLocation,
mkExportLocation,
fromExportLocation,
ExportDirectory,
mkExportDirectory,
fromExportDirectory,
exportDirectories,
) where
import Git.FilePath
import qualified System.FilePath.Posix as Posix
-- A location on a remote that a key can be exported to.
-- The FilePath will be relative to the top of the export,
-- and may contain unix-style path separators.
-- and uses unix-style path separators.
newtype ExportLocation = ExportLocation FilePath
deriving (Show, Eq)
mkExportLocation :: FilePath -> ExportLocation
mkExportLocation = ExportLocation . toInternalGitPath
fromExportLocation :: ExportLocation -> FilePath
fromExportLocation (ExportLocation f) = f
newtype ExportDirectory = ExportDirectory FilePath
deriving (Show, Eq)
mkExportDirectory :: FilePath -> ExportDirectory
mkExportDirectory = ExportDirectory . toInternalGitPath
fromExportDirectory :: ExportDirectory -> FilePath
fromExportDirectory (ExportDirectory f) = f
-- | All subdirectories down to the ExportLocation, with the deepest ones
-- last. Does not include the top of the export.
exportDirectories :: ExportLocation -> [ExportDirectory]

View file

@ -26,18 +26,30 @@ Work is in progress. Todo list:
export database is not populated. So, seems that the export database needs
to get populated based on the export log in these cases.
This needs a (local) record of what tree the (local) export db
was last updated for, which is updated at the same time as the export log.
One way to record that would be as a git ref.
This needs the db to contain a record of the data source,
the tree that most recently populated it.
When the export log contains a different tree than the local
record, the export was updated in another repository, and so the
When the export log contains a different tree than the data source,
the export was updated in another repository, and so the
export db needs to be updated.
Updating the export db could diff the last exported treeish with the
Updating the export db could diff the data source with the
logged treeish. Add/delete exported files from the database to get
it to the same state as the remote database.
When an export is incomplete, the database is in some
state in between the data source tree and the incompletely
exported tree. Diffing won't resolve this.
When to record the data source? If it's done at the same time the export
is recorded (as no longer incomplete) in the export log, all the files
have not yet been uploaded to the export, and the the database is not
fully updated to match the data source.
Seems that we need a separate table, to be able to look up filenames
from the export tree by key. That table can be fully populated,
before the Exported table is.
* tracking exports
* Support configuring export in the assistant

View file

@ -506,6 +506,7 @@ Executable git-annex
Annex.Direct
Annex.Drop
Annex.Environment
Annex.Export
Annex.FileMatcher
Annex.Fixup
Annex.GitOverlay