From 6babb2c73f009657697b0d9ebce15310f8b39292 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 9 Apr 2019 19:58:24 -0400 Subject: [PATCH] remove wrong uniqueness constraint from ContentIdentifier db Fix bug that caused importing from a special remote to repeatedly download unchanged files when multiple files in the remote have the same content. Unfortunately, there's really no good way to remove a uniqueness constraint from a sqlite database. The best that can be done is to make a new table and copy the data over. But that would require using persistent's migrations or raw sql, and I don't want to do either. Instead, a sledgehammer approach: Renamed .git/annex/cid to .git/annex/cids. When the new database doesn't exist, it will be populated from the git-annex branch. Noting deletes the old database. Don't want to delete it out from under some long-running git-annex process that might be using it. It could eventually be deleted. But this is such a new feature, probably few repos have the database in any case. --- Annex/Locations.hs | 8 ++++++-- CHANGELOG | 3 +++ Database/ContentIdentifier.hs | 7 ++----- ...repeated_import_of_same_content_files_from_remote.mdwn | 2 ++ 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 07abad2b2b..8d212fe10a 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -355,9 +355,13 @@ gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck" gitAnnexExportUpdateLock :: UUID -> Git.Repo -> FilePath gitAnnexExportUpdateLock u r = gitAnnexExportDbDir u r ++ ".upl" -{- Directory containing database used to record remote content ids. -} +{- Directory containing database used to record remote content ids. + - + - (This used to be "cid", but a problem with the database caused it to + - need to be rebuilt with a new name.) + -} gitAnnexContentIdentifierDbDir :: Git.Repo -> FilePath -gitAnnexContentIdentifierDbDir r = gitAnnexDir r "cid" +gitAnnexContentIdentifierDbDir r = gitAnnexDir r "cids" {- Lock file for writing to the content id database. -} gitAnnexContentIdentifierLock :: Git.Repo -> FilePath diff --git a/CHANGELOG b/CHANGELOG index 1770449afe..97ff444619 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,9 @@ git-annex (7.20190323) UNRELEASED; urgency=medium to allow git-annex import of files from an Android device. This can be combined with exporttree=yes and git-annex export used to send changes back to the Android device. + * Fix bug that caused importing from a special remote to repeatedly + download unchanged files when multiple files in the remote have the same + content. -- Joey Hess Tue, 09 Apr 2019 14:07:53 -0400 diff --git a/Database/ContentIdentifier.hs b/Database/ContentIdentifier.hs index 75e5c545db..4180c58541 100644 --- a/Database/ContentIdentifier.hs +++ b/Database/ContentIdentifier.hs @@ -6,7 +6,7 @@ -} {-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} -{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-} {-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes #-} @@ -51,9 +51,6 @@ ContentIdentifiers remote UUID cid ContentIdentifier key IKey - ContentIdentifiersIndexRemoteKey remote key - ContentIdentifiersIndexRemoteCID remote cid - UniqueRemoteCidKey remote cid key -- The last git-annex branch tree sha that was used to update -- ContentIdentifiers AnnexBranch @@ -93,7 +90,7 @@ flushDbQueue (ContentIdentifierHandle h) = H.flushDbQueue h -- Be sure to also update the git-annex branch when using this. recordContentIdentifier :: ContentIdentifierHandle -> UUID -> ContentIdentifier -> Key -> IO () recordContentIdentifier h u cid k = queueDb h $ do - void $ insertUnique $ ContentIdentifiers u cid (toIKey k) + void $ insert_ $ ContentIdentifiers u cid (toIKey k) getContentIdentifiers :: ContentIdentifierHandle -> UUID -> Key -> IO [ContentIdentifier] getContentIdentifiers (ContentIdentifierHandle h) u k = H.queryDbQueue h $ do diff --git a/doc/bugs/repeated_import_of_same_content_files_from_remote.mdwn b/doc/bugs/repeated_import_of_same_content_files_from_remote.mdwn index 4fcf10f4a8..083898f4c5 100644 --- a/doc/bugs/repeated_import_of_same_content_files_from_remote.mdwn +++ b/doc/bugs/repeated_import_of_same_content_files_from_remote.mdwn @@ -7,3 +7,5 @@ unncessarly importing in that case. --[[Joey]] Seems that the ContentIdentifier database can actually only store one cid for a given key at a time, not multiples needed by this. This needs a change to the db schema to fix, unfortunately. + +> [[done]] --[[Joey]]