Merge attachments and update notes (#2336)
We follow a different merge procedure for each attachment type: - For PDF attachments, compare by MD5. If no match, get the top 50 words in the attachment's text and hash those, then check again for a match. Update references to item keys in notes and annotations. - For web (snapshot / link) attachments, compare by title and URL. Prefer a title + URL match but accept a title-only match. - For other attachment types, keep all attachments from all items being merged. Also: - Move most merge tests from Duplicates to Items#merge(). It just doesn't make sense to worry about the UI in these.
This commit is contained in:
parent
8e8b03e5ff
commit
ef82becf00
13 changed files with 753 additions and 95 deletions
|
@ -2916,7 +2916,7 @@ Zotero.Item.prototype.deleteAttachmentFile = Zotero.Promise.coroutine(function*
|
|||
* Return a file:/// URL path to files and snapshots
|
||||
*/
|
||||
Zotero.Item.prototype.getLocalFileURL = function() {
|
||||
if (!this.isAttachment) {
|
||||
if (!this.isAttachment()) {
|
||||
throw ("getLocalFileURL() can only be called on attachment items");
|
||||
}
|
||||
|
||||
|
|
|
@ -925,19 +925,20 @@ Zotero.Items = function() {
|
|||
*
|
||||
* @param {Zotero.Item} fromItem
|
||||
* @param {Zotero.Item} toItem
|
||||
* @param {Boolean} includeTrashed
|
||||
* @return {Promise}
|
||||
*/
|
||||
this.moveChildItems = async function (fromItem, toItem) {
|
||||
this.moveChildItems = async function (fromItem, toItem, includeTrashed = false) {
|
||||
//Zotero.DB.requireTransaction();
|
||||
|
||||
// Annotations on files
|
||||
if (fromItem.isFileAttachment()) {
|
||||
let fn = async function () {
|
||||
let annotations = fromItem.getAnnotations();
|
||||
let annotations = fromItem.getAnnotations(includeTrashed);
|
||||
for (let annotation of annotations) {
|
||||
annotation.parentItemID = toItem.id;
|
||||
await annotation.save();
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
if (!Zotero.DB.inTransaction) {
|
||||
|
@ -955,68 +956,42 @@ Zotero.Items = function() {
|
|||
|
||||
this.merge = function (item, otherItems) {
|
||||
Zotero.debug("Merging items");
|
||||
|
||||
|
||||
return Zotero.DB.executeTransaction(function* () {
|
||||
var otherItemIDs = [];
|
||||
var itemURI = Zotero.URI.getItemURI(item);
|
||||
|
||||
var replPred = Zotero.Relations.replacedItemPredicate;
|
||||
var toSave = {};
|
||||
toSave[item.id] = item;
|
||||
|
||||
var earliestDateAdded = item.dateAdded;
|
||||
|
||||
let remapAttachmentKeys = yield this._mergePDFAttachments(item, otherItems);
|
||||
yield this._mergeWebAttachments(item, otherItems);
|
||||
yield this._mergeOtherAttachments(item, otherItems);
|
||||
|
||||
for (let otherItem of otherItems) {
|
||||
if (otherItem.libraryID !== item.libraryID) {
|
||||
throw new Error('Items being merged must be in the same library');
|
||||
}
|
||||
|
||||
// Use the earliest date added of all the items
|
||||
if (otherItem.dateAdded < earliestDateAdded) {
|
||||
earliestDateAdded = otherItem.dateAdded;
|
||||
}
|
||||
|
||||
let otherItemURI = Zotero.URI.getItemURI(otherItem);
|
||||
|
||||
// Move child items to master
|
||||
var ids = otherItem.getAttachments(true).concat(otherItem.getNotes(true));
|
||||
for (let id of ids) {
|
||||
var attachment = yield this.getAsync(id);
|
||||
|
||||
// TODO: Skip identical children?
|
||||
|
||||
attachment.parentID = item.id;
|
||||
yield attachment.save();
|
||||
// Move notes to master
|
||||
var noteIDs = otherItem.getNotes(true);
|
||||
for (let id of noteIDs) {
|
||||
var note = yield this.getAsync(id);
|
||||
note.parentItemID = item.id;
|
||||
Zotero.Notes.replaceItemKey(note, otherItem.key, item.key);
|
||||
Zotero.Notes.replaceAllItemKeys(note, remapAttachmentKeys);
|
||||
toSave[note.id] = note;
|
||||
}
|
||||
|
||||
// Add relations to master
|
||||
let oldRelations = otherItem.getRelations();
|
||||
for (let pred in oldRelations) {
|
||||
oldRelations[pred].forEach(obj => item.addRelation(pred, obj));
|
||||
}
|
||||
// Move relations to master
|
||||
yield this._moveRelations(otherItem, item);
|
||||
|
||||
// Remove merge-tracking relations from other item, so that there aren't two
|
||||
// subjects for a given deleted object
|
||||
let replItems = otherItem.getRelationsByPredicate(replPred);
|
||||
for (let replItem of replItems) {
|
||||
otherItem.removeRelation(replPred, replItem);
|
||||
}
|
||||
|
||||
// Update relations on items in the library that point to the other item
|
||||
// to point to the master instead
|
||||
let rels = yield Zotero.Relations.getByObject('item', otherItemURI);
|
||||
for (let rel of rels) {
|
||||
// Skip merge-tracking relations, which are dealt with above
|
||||
if (rel.predicate == replPred) continue;
|
||||
// Skip items in other libraries. They might not be editable, and even
|
||||
// if they are, merging items in one library shouldn't affect another library,
|
||||
// so those will follow the merge-tracking relations and can optimize their
|
||||
// path if they're resaved.
|
||||
if (rel.subject.libraryID != item.libraryID) continue;
|
||||
rel.subject.removeRelation(rel.predicate, otherItemURI);
|
||||
rel.subject.addRelation(rel.predicate, itemURI);
|
||||
if (!toSave[rel.subject.id]) {
|
||||
toSave[rel.subject.id] = rel.subject;
|
||||
}
|
||||
}
|
||||
|
||||
// All other operations are additive only and do not affect the,
|
||||
// All other operations are additive only and do not affect the
|
||||
// old item, which will be put in the trash
|
||||
|
||||
// Add collections to master
|
||||
|
@ -1042,12 +1017,9 @@ Zotero.Items = function() {
|
|||
}
|
||||
}
|
||||
|
||||
// Add relation to track merge
|
||||
item.addRelation(replPred, otherItemURI);
|
||||
|
||||
// Trash other item
|
||||
otherItem.deleted = true;
|
||||
yield otherItem.save();
|
||||
toSave[otherItem.id] = otherItem;
|
||||
}
|
||||
|
||||
item.setField('dateAdded', earliestDateAdded);
|
||||
|
@ -1060,7 +1032,312 @@ Zotero.Items = function() {
|
|||
Zotero.Notifier.trigger('removeDuplicatesMaster', 'item', item.id);
|
||||
}.bind(this));
|
||||
};
|
||||
|
||||
|
||||
|
||||
this._mergePDFAttachments = async function (item, otherItems) {
|
||||
Zotero.DB.requireTransaction();
|
||||
|
||||
let remapAttachmentKeys = new Map();
|
||||
let masterAttachmentHashes = await this._hashItem(item, 'bytes');
|
||||
let hashesIncludeText = false;
|
||||
|
||||
for (let otherItem of otherItems) {
|
||||
let mergedMasterAttachments = new Set();
|
||||
|
||||
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
|
||||
if (!otherAttachment.isPDFAttachment()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// First check if master has an attachment with identical MD5 hash
|
||||
let matchingHash = await otherAttachment.attachmentHash;
|
||||
let masterAttachmentID = masterAttachmentHashes.get(matchingHash);
|
||||
|
||||
if (!masterAttachmentID && item.numAttachments(true)) {
|
||||
// If that didn't work, hash master attachments by the
|
||||
// most common words in their text and check again.
|
||||
if (!hashesIncludeText) {
|
||||
masterAttachmentHashes = new Map([
|
||||
...masterAttachmentHashes,
|
||||
...await this._hashItem(item, 'text')
|
||||
]);
|
||||
hashesIncludeText = true;
|
||||
}
|
||||
|
||||
matchingHash = await this._hashAttachmentText(otherAttachment);
|
||||
masterAttachmentID = masterAttachmentHashes.get(matchingHash);
|
||||
}
|
||||
|
||||
if (!masterAttachmentID || mergedMasterAttachments.has(masterAttachmentID)) {
|
||||
Zotero.debug(`No unmerged match for attachment ${otherAttachment.id} in master item - moving`);
|
||||
otherAttachment.parentItemID = item.id;
|
||||
await otherAttachment.save();
|
||||
continue;
|
||||
}
|
||||
mergedMasterAttachments.add(masterAttachmentID);
|
||||
|
||||
let masterAttachment = await this.getAsync(masterAttachmentID);
|
||||
|
||||
if (masterAttachment.attachmentContentType !== otherAttachment.attachmentContentType) {
|
||||
Zotero.debug(`Master attachment ${masterAttachmentID} matches ${otherAttachment.id}, `
|
||||
+ 'but content types differ - moving');
|
||||
otherAttachment.parentItemID = item.id;
|
||||
await otherAttachment.save();
|
||||
continue;
|
||||
}
|
||||
|
||||
Zotero.debug(`Master attachment ${masterAttachmentID} matches ${otherAttachment.id} - merging`);
|
||||
await this.moveChildItems(otherAttachment, masterAttachment, true);
|
||||
await this._moveEmbeddedNote(otherAttachment, masterAttachment);
|
||||
await this._moveRelations(otherAttachment, masterAttachment);
|
||||
|
||||
otherAttachment.deleted = true;
|
||||
await otherAttachment.save();
|
||||
|
||||
// Later on, when processing notes, we'll use this to remap
|
||||
// URLs pointing to the old attachment.
|
||||
remapAttachmentKeys.set(otherAttachment.key, masterAttachment.key);
|
||||
|
||||
// Items can only have one replaced item predicate
|
||||
if (!masterAttachment.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate)) {
|
||||
masterAttachment.addRelation(Zotero.Relations.replacedItemPredicate,
|
||||
Zotero.URI.getItemURI(otherAttachment));
|
||||
}
|
||||
|
||||
await masterAttachment.save();
|
||||
}
|
||||
}
|
||||
|
||||
return remapAttachmentKeys;
|
||||
};
|
||||
|
||||
|
||||
this._mergeWebAttachments = async function (item, otherItems) {
|
||||
Zotero.DB.requireTransaction();
|
||||
|
||||
let masterAttachments = (await this.getAsync(item.getAttachments(true)))
|
||||
.filter(attachment => attachment.isWebAttachment());
|
||||
|
||||
for (let otherItem of otherItems) {
|
||||
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
|
||||
if (!otherAttachment.isWebAttachment()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we can find an attachment with the same title *and* URL, use it.
|
||||
let masterAttachment = (
|
||||
masterAttachments.find(attachment => attachment.getField('title') == otherAttachment.getField('title')
|
||||
&& attachment.getField('url') == otherAttachment.getField('url')
|
||||
&& attachment.attachmentLinkMode === otherAttachment.attachmentLinkMode)
|
||||
|| masterAttachments.find(attachment => attachment.getField('title') == otherAttachment.getField('title')
|
||||
&& attachment.attachmentLinkMode === otherAttachment.attachmentLinkMode)
|
||||
);
|
||||
|
||||
if (!masterAttachment) {
|
||||
Zotero.debug(`No match for web attachment ${otherAttachment.id} in master item - moving`);
|
||||
otherAttachment.parentItemID = item.id;
|
||||
await otherAttachment.save();
|
||||
continue;
|
||||
}
|
||||
|
||||
otherAttachment.deleted = true;
|
||||
await this._moveRelations(otherAttachment, masterAttachment);
|
||||
await otherAttachment.save();
|
||||
|
||||
masterAttachment.addRelation(Zotero.Relations.replacedItemPredicate,
|
||||
Zotero.URI.getItemURI(otherAttachment));
|
||||
await masterAttachment.save();
|
||||
|
||||
// Don't match with this attachment again
|
||||
masterAttachments = masterAttachments.filter(a => a !== masterAttachment);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
this._mergeOtherAttachments = async function (item, otherItems) {
|
||||
Zotero.DB.requireTransaction();
|
||||
|
||||
for (let otherItem of otherItems) {
|
||||
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
|
||||
if (otherAttachment.isPDFAttachment() || otherAttachment.isWebAttachment()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
otherAttachment.parentItemID = item.id;
|
||||
await otherAttachment.save();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Hash each attachment of the provided item. Return a map from hashes to
|
||||
* attachment IDs.
|
||||
*
|
||||
* @param {Zotero.Item} item
|
||||
* @param {String} hashType 'bytes' or 'text'
|
||||
* @return {Promise<Map<String, String>>}
|
||||
*/
|
||||
this._hashItem = async function (item, hashType) {
|
||||
if (!['bytes', 'text'].includes(hashType)) {
|
||||
throw new Error('Invalid hash type');
|
||||
}
|
||||
|
||||
let attachments = (await this.getAsync(item.getAttachments(true)))
|
||||
.filter(attachment => attachment.isFileAttachment());
|
||||
let hashes = new Map();
|
||||
await Promise.all(attachments.map(async (attachment) => {
|
||||
let hash = hashType === 'bytes'
|
||||
? await attachment.attachmentHash
|
||||
: await this._hashAttachmentText(attachment);
|
||||
if (hash) {
|
||||
hashes.set(hash, attachment.id);
|
||||
}
|
||||
}));
|
||||
return hashes;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Hash an attachment by the most common words in its text.
|
||||
* @param {Zotero.Item} attachment
|
||||
* @return {Promise<String>}
|
||||
*/
|
||||
this._hashAttachmentText = async function (attachment) {
|
||||
if ((await OS.File.stat(await attachment.getFilePathAsync())).size > 5e8) {
|
||||
Zotero.debug('_hashAttachmentText: Attachment too large');
|
||||
return null;
|
||||
}
|
||||
|
||||
let text = await attachment.attachmentText;
|
||||
if (!text) {
|
||||
Zotero.debug('_hashAttachmentText: Attachment has no text');
|
||||
return null;
|
||||
}
|
||||
|
||||
let mostCommonWords = this._getMostCommonWords(text, 50);
|
||||
if (mostCommonWords.length < 10) {
|
||||
Zotero.debug('_hashAttachmentText: Not enough unique words');
|
||||
return null;
|
||||
}
|
||||
return Zotero.Utilities.Internal.md5(mostCommonWords.sort().join(' '));
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Get the n most common words in s in descending order of frequency.
|
||||
* If s contains fewer than n unique words, the size of the returned array
|
||||
* will be less than n.
|
||||
*
|
||||
* @param {String} s
|
||||
* @param {Number} n
|
||||
* @return {String[]}
|
||||
*/
|
||||
this._getMostCommonWords = function (s, n) {
|
||||
// Use an iterative approach for better performance.
|
||||
|
||||
const whitespaceRe = /\s/;
|
||||
const wordCharRe = /\p{Letter}/u; // [a-z] only matches Latin
|
||||
|
||||
let freqs = new Map();
|
||||
let currentWord = '';
|
||||
|
||||
for (let codePoint of s) {
|
||||
if (whitespaceRe.test(codePoint)) {
|
||||
if (currentWord.length > 3) {
|
||||
freqs.set(currentWord, (freqs.get(currentWord) || 0) + 1);
|
||||
}
|
||||
|
||||
currentWord = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wordCharRe.test(codePoint)) {
|
||||
currentWord += codePoint.toLowerCase();
|
||||
}
|
||||
}
|
||||
|
||||
// Break ties in locale order.
|
||||
return [...freqs.keys()]
|
||||
.sort((a, b) => (freqs.get(b) - freqs.get(a)) || Zotero.localeCompare(a, b))
|
||||
.slice(0, n);
|
||||
};
|
||||
|
||||
/**
|
||||
* Move fromItem's embedded note, if it has one, to toItem.
|
||||
* If toItem already has an embedded note, the note will be added as a new
|
||||
* child note item on toItem's parent.
|
||||
* Requires a transaction.
|
||||
*/
|
||||
this._moveEmbeddedNote = async function (fromItem, toItem) {
|
||||
Zotero.DB.requireTransaction();
|
||||
|
||||
if (fromItem.getNote()) {
|
||||
let noteItem = toItem;
|
||||
if (toItem.getNote()) {
|
||||
noteItem = new Zotero.Item('note');
|
||||
noteItem.parentItemID = toItem.parentItemID;
|
||||
}
|
||||
noteItem.setNote(fromItem.getNote());
|
||||
fromItem.setNote('');
|
||||
Zotero.Notes.replaceItemKey(noteItem, fromItem.key, toItem.key);
|
||||
await noteItem.save();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Move fromItem's relations to toItem as part of a merge.
|
||||
* Requires a transaction.
|
||||
*
|
||||
* @param {Zotero.Item} fromItem
|
||||
* @param {Zotero.Item} toItem
|
||||
* @return {Promise}
|
||||
*/
|
||||
this._moveRelations = async function (fromItem, toItem) {
|
||||
Zotero.DB.requireTransaction();
|
||||
|
||||
let replPred = Zotero.Relations.replacedItemPredicate;
|
||||
let fromURI = Zotero.URI.getItemURI(fromItem);
|
||||
let toURI = Zotero.URI.getItemURI(toItem);
|
||||
|
||||
// Add relations to toItem
|
||||
let oldRelations = fromItem.getRelations();
|
||||
for (let pred in oldRelations) {
|
||||
oldRelations[pred].forEach(obj => toItem.addRelation(pred, obj));
|
||||
}
|
||||
|
||||
// Remove merge-tracking relations from fromItem, so that there aren't two
|
||||
// subjects for a given deleted object
|
||||
let replItems = fromItem.getRelationsByPredicate(replPred);
|
||||
for (let replItem of replItems) {
|
||||
fromItem.removeRelation(replPred, replItem);
|
||||
}
|
||||
|
||||
// Update relations on items in the library that point to the other item
|
||||
// to point to the master instead
|
||||
let rels = await Zotero.Relations.getByObject('item', fromURI);
|
||||
for (let rel of rels) {
|
||||
// Skip merge-tracking relations, which are dealt with above
|
||||
if (rel.predicate == replPred) continue;
|
||||
// Skip items in other libraries. They might not be editable, and even
|
||||
// if they are, merging items in one library shouldn't affect another library,
|
||||
// so those will follow the merge-tracking relations and can optimize their
|
||||
// path if they're resaved.
|
||||
if (rel.subject.libraryID != toItem.libraryID) continue;
|
||||
rel.subject.removeRelation(rel.predicate, fromURI);
|
||||
rel.subject.addRelation(rel.predicate, toURI);
|
||||
await rel.subject.save();
|
||||
}
|
||||
|
||||
// Add relation to track merge
|
||||
toItem.addRelation(replPred, fromURI);
|
||||
|
||||
await fromItem.save();
|
||||
await toItem.save();
|
||||
};
|
||||
|
||||
|
||||
this.trash = Zotero.Promise.coroutine(function* (ids) {
|
||||
Zotero.DB.requireTransaction();
|
||||
|
|
|
@ -96,7 +96,36 @@ Zotero.Notes = new function() {
|
|||
await Zotero.Notifier.trigger('refresh', 'item', idsToRefresh);
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Update item key URLs in the item's note, replacing all instances of each
|
||||
* key in itemKeyMap with the associated value.
|
||||
* Passed item should have an embedded note or be a note item.
|
||||
*
|
||||
* @param {Zotero.Item} item
|
||||
* @param {Map<String, String>} itemKeyMap
|
||||
*/
|
||||
this.replaceAllItemKeys = function (item, itemKeyMap) {
|
||||
let note = item.getNote();
|
||||
let keys = [...itemKeyMap.keys()].join('|');
|
||||
let re = new RegExp(`%2Fitems%2F(${keys})`, 'g');
|
||||
note = note.replace(re, (str, key) => `%2Fitems%2F${itemKeyMap.get(key)}`);
|
||||
re = new RegExp(`data-attachment-key="(${keys})"`);
|
||||
note = note.replace(re, (str, key) => `data-attachment-key="${itemKeyMap.get(key)}"`);
|
||||
item.setNote(note);
|
||||
};
|
||||
|
||||
/**
|
||||
* Convenience function to call replaceAllItemKeys with a single key-value pair.
|
||||
*
|
||||
* @param {Zotero.Item} item
|
||||
* @param {String} fromItemKey
|
||||
* @param {String} toItemKey
|
||||
*/
|
||||
this.replaceItemKey = function (item, fromItemKey, toItemKey) {
|
||||
this.replaceAllItemKeys(item, new Map([[fromItemKey, toItemKey]]));
|
||||
};
|
||||
|
||||
this.getExportableNote = async function(item) {
|
||||
if (!item.isNote()) {
|
||||
throw new Error('Item is not a note');
|
||||
|
|
BIN
test/tests/data/duplicatesMerge_JSTOR_1.pdf
Normal file
BIN
test/tests/data/duplicatesMerge_JSTOR_1.pdf
Normal file
Binary file not shown.
BIN
test/tests/data/duplicatesMerge_JSTOR_2.pdf
Normal file
BIN
test/tests/data/duplicatesMerge_JSTOR_2.pdf
Normal file
Binary file not shown.
BIN
test/tests/data/duplicatesMerge_empty_new_md5.pdf
Normal file
BIN
test/tests/data/duplicatesMerge_empty_new_md5.pdf
Normal file
Binary file not shown.
BIN
test/tests/data/duplicatesMerge_test_new_md5.pdf
Normal file
BIN
test/tests/data/duplicatesMerge_test_new_md5.pdf
Normal file
Binary file not shown.
BIN
test/tests/data/wonderland_long.pdf
Normal file
BIN
test/tests/data/wonderland_long.pdf
Normal file
Binary file not shown.
BIN
test/tests/data/wonderland_short.pdf
Normal file
BIN
test/tests/data/wonderland_short.pdf
Normal file
Binary file not shown.
BIN
test/tests/data/wonderland_short_watermarked_1.pdf
Normal file
BIN
test/tests/data/wonderland_short_watermarked_1.pdf
Normal file
Binary file not shown.
BIN
test/tests/data/wonderland_short_watermarked_2.pdf
Normal file
BIN
test/tests/data/wonderland_short_watermarked_2.pdf
Normal file
Binary file not shown.
|
@ -20,6 +20,27 @@ describe("Duplicate Items", function () {
|
|||
after(function () {
|
||||
win.close();
|
||||
});
|
||||
|
||||
async function merge(itemID) {
|
||||
var userLibraryID = Zotero.Libraries.userLibraryID;
|
||||
|
||||
var selected = await cv.selectByID('D' + userLibraryID);
|
||||
assert.ok(selected);
|
||||
await waitForItemsLoad(win);
|
||||
|
||||
// Select the first item, which should select both
|
||||
var iv = zp.itemsView;
|
||||
var row = iv.getRowIndexByID(itemID);
|
||||
var promise = iv.waitForSelect();
|
||||
clickOnItemsRow(win, iv, row);
|
||||
await promise;
|
||||
|
||||
// Click merge button
|
||||
var button = win.document.getElementById('zotero-duplicates-merge-button');
|
||||
button.click();
|
||||
|
||||
await waitForNotifierEvent('refresh', 'trash');
|
||||
}
|
||||
|
||||
describe("Merging", function () {
|
||||
it("should merge two items in duplicates view", function* () {
|
||||
|
@ -28,28 +49,10 @@ describe("Duplicate Items", function () {
|
|||
yield item2.saveTx();
|
||||
var uri2 = Zotero.URI.getItemURI(item2);
|
||||
|
||||
var userLibraryID = Zotero.Libraries.userLibraryID;
|
||||
|
||||
var selected = yield cv.selectByID('D' + userLibraryID);
|
||||
assert.ok(selected);
|
||||
yield waitForItemsLoad(win);
|
||||
|
||||
// Select the first item, which should select both
|
||||
var iv = zp.itemsView;
|
||||
var row = iv.getRowIndexByID(item1.id);
|
||||
assert.isNumber(row);
|
||||
var promise = iv.waitForSelect();
|
||||
clickOnItemsRow(win, iv, row);
|
||||
assert.equal(iv.selection.count, 2);
|
||||
yield promise;
|
||||
|
||||
// Click merge button
|
||||
var button = win.document.getElementById('zotero-duplicates-merge-button');
|
||||
button.click();
|
||||
|
||||
yield waitForNotifierEvent('refresh', 'trash');
|
||||
yield merge(item1.id);
|
||||
|
||||
// Items should be gone
|
||||
var iv = zp.itemsView;
|
||||
assert.isFalse(iv.getRowIndexByID(item1.id));
|
||||
assert.isFalse(iv.getRowIndexByID(item2.id));
|
||||
assert.isTrue(item2.deleted);
|
||||
|
@ -67,27 +70,11 @@ describe("Duplicate Items", function () {
|
|||
var item2 = item1.clone();
|
||||
item2.setCollections([collection2.id]);
|
||||
yield item2.saveTx();
|
||||
|
||||
var userLibraryID = Zotero.Libraries.userLibraryID;
|
||||
|
||||
var selected = yield cv.selectByID('D' + userLibraryID);
|
||||
assert.ok(selected);
|
||||
yield waitForItemsLoad(win);
|
||||
|
||||
// Select the first item, which should select both
|
||||
var iv = zp.itemsView;
|
||||
var row = iv.getRowIndexByID(item1.id);
|
||||
var promise = iv.waitForSelect();
|
||||
clickOnItemsRow(win, iv, row);
|
||||
yield promise;
|
||||
|
||||
// Click merge button
|
||||
var button = win.document.getElementById('zotero-duplicates-merge-button');
|
||||
button.click();
|
||||
|
||||
yield waitForNotifierEvent('refresh', 'trash');
|
||||
|
||||
yield merge(item1.id);
|
||||
|
||||
// Items should be gone
|
||||
var iv = zp.itemsView;
|
||||
assert.isFalse(iv.getRowIndexByID(item1.id));
|
||||
assert.isFalse(iv.getRowIndexByID(item2.id));
|
||||
assert.isTrue(item2.deleted);
|
||||
|
|
|
@ -413,6 +413,371 @@ describe("Zotero.Items", function () {
|
|||
var rels = item3.getRelationsByPredicate(predicate);
|
||||
assert.deepEqual(rels, [item2URI]);
|
||||
})
|
||||
|
||||
it("should merge identical attachments based on file hash", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importPDFAttachment(item1);
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importPDFAttachment(item2);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 1);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
});
|
||||
|
||||
it("should merge one attachment per item into the master attachment", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importPDFAttachment(item1);
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importPDFAttachment(item2);
|
||||
|
||||
let item3 = item1.clone();
|
||||
await item3.saveTx();
|
||||
let attachment3 = await importPDFAttachment(item3);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2, item3]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 1);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
assert.isTrue(item3.deleted);
|
||||
assert.isTrue(attachment3.deleted);
|
||||
});
|
||||
|
||||
it("should merge identical attachments based on content hash", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importPDFAttachment(item1);
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importFileAttachment('duplicatesMerge_test_new_md5.pdf', { parentItemID: item2.id });
|
||||
|
||||
assert.equal(await attachment1.attachmentText, await attachment2.attachmentText);
|
||||
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 1);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
});
|
||||
|
||||
it("shouldn't merge based on content hash when files are empty", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importFileAttachment('empty.pdf', { parentItemID: item1.id });
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importFileAttachment('duplicatesMerge_empty_new_md5.pdf', { parentItemID: item2.id });
|
||||
|
||||
assert.equal(await attachment1.attachmentText, await attachment2.attachmentText);
|
||||
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
|
||||
assert.isEmpty(await attachment1.attachmentText);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 2);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isFalse(attachment2.deleted);
|
||||
});
|
||||
|
||||
it("should allow small differences when hashing content", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importFileAttachment('duplicatesMerge_JSTOR_1.pdf', { parentItemID: item1.id });
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importFileAttachment('duplicatesMerge_JSTOR_2.pdf', { parentItemID: item2.id });
|
||||
|
||||
assert.notEqual(await attachment1.attachmentText, await attachment2.attachmentText);
|
||||
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
|
||||
assert.equal(
|
||||
(await Zotero.Items._hashAttachmentText(attachment1)).fromText,
|
||||
(await Zotero.Items._hashAttachmentText(attachment2)).fromText
|
||||
);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 1);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
});
|
||||
|
||||
it("should keep similar but not identical attachments separate", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importFileAttachment('wonderland_short.pdf', { parentItemID: item1.id });
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importFileAttachment('wonderland_long.pdf', { parentItemID: item2.id });
|
||||
|
||||
assert.notEqual(await attachment1.attachmentText, await attachment2.attachmentText);
|
||||
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 2);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isFalse(attachment2.deleted);
|
||||
});
|
||||
|
||||
it("should only match attachments one-to-one", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importFileAttachment('wonderland_short_watermarked_1.pdf', { parentItemID: item1.id });
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importFileAttachment('wonderland_short_watermarked_2.pdf', { parentItemID: item2.id });
|
||||
let attachment3 = await importFileAttachment('wonderland_short_watermarked_2.pdf', { parentItemID: item2.id });
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 2);
|
||||
assert.isTrue(item2.deleted);
|
||||
// Doesn't matter which got merged
|
||||
assert.isTrue((attachment2.deleted || attachment3.deleted) && !(attachment2.deleted && attachment3.deleted));
|
||||
});
|
||||
|
||||
it("should copy annotations when merging", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importPDFAttachment(item1);
|
||||
let annotation1 = await createAnnotation('note', attachment1);
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importPDFAttachment(item2);
|
||||
let annotation2 = await createAnnotation('highlight', attachment2);
|
||||
let annotation2Note = await Zotero.EditorInstance.createNoteFromAnnotations([annotation2], item2.id);
|
||||
|
||||
assert.include(annotation2Note.getNote(), attachment2.key);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.isFalse(annotation1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 1);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
assert.isFalse(annotation2.deleted);
|
||||
assert.equal(annotation1.parentItemID, attachment1.id);
|
||||
assert.equal(annotation2.parentItemID, attachment1.id);
|
||||
assert.notInclude(annotation2Note.getNote(), item2.key);
|
||||
assert.include(annotation2Note.getNote(), item1.key);
|
||||
assert.notInclude(annotation2Note.getNote(), attachment2.key);
|
||||
assert.include(annotation2Note.getNote(), attachment1.key);
|
||||
});
|
||||
|
||||
it("should update all item keys when moving notes", async function () {
|
||||
let attachmentFilenames = [
|
||||
'recognizePDF_test_arXiv.pdf',
|
||||
'recognizePDF_test_DOI.pdf',
|
||||
'recognizePDF_test_title.pdf'
|
||||
];
|
||||
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachments1 = [];
|
||||
for (let filename of attachmentFilenames) {
|
||||
let attachment = await importFileAttachment(filename, { parentID: item1.id });
|
||||
attachments1.push(attachment);
|
||||
}
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachments2 = [];
|
||||
let annotations2 = [];
|
||||
let notes2 = [];
|
||||
for (let filename of attachmentFilenames) {
|
||||
let attachment = await importFileAttachment(filename, { parentID: item2.id });
|
||||
let annotation = await createAnnotation('highlight', attachment);
|
||||
let note = await Zotero.EditorInstance.createNoteFromAnnotations([annotation], item2.id);
|
||||
attachments2.push(attachment);
|
||||
annotations2.push(annotation);
|
||||
notes2.push(note);
|
||||
|
||||
assert.include(note.getNote(), item2.key);
|
||||
assert.include(note.getNote(), attachment.key);
|
||||
}
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 3);
|
||||
assert.isTrue(item2.deleted);
|
||||
|
||||
for (let i = 0; i < 3; i++) {
|
||||
let attachment1 = attachments1[i];
|
||||
let attachment2 = attachments2[i];
|
||||
let note = notes2[i];
|
||||
|
||||
assert.equal(note.parentItemID, item1.id);
|
||||
assert.include(note.getNote(), item1.key);
|
||||
assert.notInclude(note.getNote(), item2.key);
|
||||
assert.include(note.getNote(), attachment1.key);
|
||||
assert.notInclude(note.getNote(), attachment2.key);
|
||||
}
|
||||
});
|
||||
|
||||
it("should merge snapshots with the same title, even if URL differs", async function () {
|
||||
let content = getTestDataDirectory();
|
||||
content.append('snapshot');
|
||||
content.append('index.html');
|
||||
|
||||
let snapshotContent = await Zotero.File.getContentsAsync(content);
|
||||
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await Zotero.Attachments.importFromSnapshotContent({
|
||||
parentItemID: item1.id,
|
||||
url: 'https://example.com/test.html',
|
||||
title: 'Snapshot',
|
||||
snapshotContent
|
||||
});
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await Zotero.Attachments.importFromSnapshotContent({
|
||||
parentItemID: item2.id,
|
||||
url: 'https://otherdomain.example.com/test.html',
|
||||
title: 'Snapshot',
|
||||
snapshotContent
|
||||
});
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 1);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
});
|
||||
|
||||
it("should merge linked URLs", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await Zotero.Attachments.linkFromURL({
|
||||
url: 'https://example.com/',
|
||||
title: 'Catalog Entry',
|
||||
parentItemID: item1.id
|
||||
});
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await Zotero.Attachments.linkFromURL({
|
||||
url: 'https://example.com/',
|
||||
title: 'Catalog Entry',
|
||||
parentItemID: item2.id
|
||||
});
|
||||
let attachment3 = await Zotero.Attachments.linkFromURL({
|
||||
url: 'https://example.com/',
|
||||
title: 'Catalog Entry',
|
||||
parentItemID: item2.id
|
||||
});
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(attachment1.getField('url'), 'https://example.com/');
|
||||
assert.equal(item1.numAttachments(true), 2);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
assert.equal(attachment3.parentItemID, item1.id);
|
||||
assert.isFalse(attachment3.deleted);
|
||||
});
|
||||
|
||||
it("should keep web attachment with same URL but different title", async function () {
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await Zotero.Attachments.linkFromURL({
|
||||
url: 'https://example.com/',
|
||||
title: 'Catalog Entry',
|
||||
parentItemID: item1.id
|
||||
});
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await Zotero.Attachments.linkFromURL({
|
||||
url: 'https://example.com/',
|
||||
title: 'Official Website',
|
||||
parentItemID: item2.id
|
||||
});
|
||||
let attachment3 = await Zotero.Attachments.linkFromURL({
|
||||
url: 'https://example.com/',
|
||||
title: 'Catalog Entry',
|
||||
parentItemID: item2.id
|
||||
});
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(attachment1.getField('url'), 'https://example.com/');
|
||||
assert.equal(item1.numAttachments(true), 2);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.equal(attachment2.parentItemID, item1.id);
|
||||
assert.isFalse(attachment2.deleted);
|
||||
assert.isTrue(attachment3.deleted);
|
||||
});
|
||||
|
||||
it("should move related items of merged attachments", async function () {
|
||||
let relatedItem = await createDataObject('item');
|
||||
|
||||
let item1 = await createDataObject('item', { setTitle: true });
|
||||
let attachment1 = await importPDFAttachment(item1);
|
||||
|
||||
let item2 = item1.clone();
|
||||
await item2.saveTx();
|
||||
let attachment2 = await importPDFAttachment(item2);
|
||||
attachment2.addRelatedItem(relatedItem);
|
||||
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
assert.isFalse(item1.deleted);
|
||||
assert.isFalse(attachment1.deleted);
|
||||
assert.equal(item1.numAttachments(true), 1);
|
||||
assert.isTrue(item2.deleted);
|
||||
assert.isTrue(attachment2.deleted);
|
||||
assert.lengthOf(attachment1.relatedItems, 1);
|
||||
assert.equal(attachment1.relatedItems[0], relatedItem.key);
|
||||
});
|
||||
|
||||
it("should move merge-tracking relation from replaced attachment to master attachment", async function () {
|
||||
let item1 = await createDataObject('item');
|
||||
let attachment1 = await importPDFAttachment(item1);
|
||||
|
||||
let item2 = await createDataObject('item');
|
||||
let attachment2 = await importPDFAttachment(item2);
|
||||
let attachment2URI = Zotero.URI.getItemURI(attachment2);
|
||||
|
||||
let item3 = await createDataObject('item');
|
||||
let attachment3 = await importPDFAttachment(item3);
|
||||
let attachment3URI = Zotero.URI.getItemURI(attachment3);
|
||||
|
||||
await Zotero.Items.merge(item2, [item3]);
|
||||
await Zotero.Items.merge(item1, [item2]);
|
||||
|
||||
var rels = attachment1.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate);
|
||||
assert.lengthOf(rels, 2);
|
||||
assert.sameMembers(rels, [attachment2URI, attachment3URI]);
|
||||
});
|
||||
})
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue