From c6e693b25de49d4d3b2fedb49ffb42f04f5fd544 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Dec 2020 13:58:01 -0400 Subject: [PATCH] remove ContentIndentifiersCidRemoteIndex uniqueness constraint For reasons explained in the bug report. Implemented using a persistent migration, which works fine. It may add a little startup overhead when a remote is enabled that uses this, but probably un-noticable. On the next major version, it would be fine to delete this database, and regenerate it from the git-annex branch information. Then this change could be reverted. Did nothing about adding back the data that got dropped from the db due to the bug. Only the borg special remote was probably affected, and it's not been released yet. rm -rf .git/annex/cidsdb does work. --- Database/ContentIdentifier.hs | 13 ++++++++++--- ...on_unique_contentidentifier_which_gets_lost.mdwn | 7 ++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Database/ContentIdentifier.hs b/Database/ContentIdentifier.hs index da7f9545de..a65b4bc569 100644 --- a/Database/ContentIdentifier.hs +++ b/Database/ContentIdentifier.hs @@ -52,7 +52,9 @@ import qualified Utility.RawFilePath as R import Database.Persist.Sql hiding (Key) import Database.Persist.TH +import Database.Persist.Sqlite (runSqlite) import qualified System.FilePath.ByteString as P +import qualified Data.Text as T data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue @@ -62,7 +64,6 @@ ContentIdentifiers cid ContentIdentifier key Key ContentIndentifiersKeyRemoteCidIndex key remote cid - ContentIndentifiersCidRemoteIndex cid remote -- The last git-annex branch tree sha that was used to update -- ContentIdentifiers AnnexBranch @@ -79,9 +80,15 @@ openDb :: Annex ContentIdentifierHandle openDb = do dbdir <- fromRepo gitAnnexContentIdentifierDbDir let db = dbdir P. "db" - unlessM (liftIO $ R.doesPathExist db) $ do - initDb db $ void $ + ifM (liftIO $ not <$> R.doesPathExist db) + ( initDb db $ void $ runMigrationSilent migrateContentIdentifier + -- Migrate from old version of database, which had + -- an incorrect uniqueness constraint on the + -- ContentIdentifiers table. + , liftIO $ runSqlite (T.pack (fromRawFilePath db)) $ void $ + runMigrationSilent migrateContentIdentifier + ) h <- liftIO $ H.openDbQueue H.SingleWriter db "content_identifiers" return $ ContentIdentifierHandle h diff --git a/doc/bugs/borg_uses_non_unique_contentidentifier_which_gets_lost.mdwn b/doc/bugs/borg_uses_non_unique_contentidentifier_which_gets_lost.mdwn index 1138c908a5..a5a4febacd 100644 --- a/doc/bugs/borg_uses_non_unique_contentidentifier_which_gets_lost.mdwn +++ b/doc/bugs/borg_uses_non_unique_contentidentifier_which_gets_lost.mdwn @@ -1,6 +1,6 @@ borg uses a non-unique ContentIdentifier ("") for everything. -I think this is why, it eventually gets lost from the sqlite database, -preventing retrieval of content from the remote. +I think this is why, it eventually gets lost from the sqlite database for +some keys, preventing retrieval of content from the remote. Repositories affected by this problem can be fixed up by just: `rm -rf .git/annex/cidsdb` @@ -24,4 +24,5 @@ And if a remote uses a hash for generating ContentIdentifiers, two different Key can have the same content in edge cases. So, need to upgrade the database, removing this constraint from it. ---[[Joey]] + +>> [[done]] --[[Joey]]