Merge attachments and update notes (#2336)

We follow a different merge procedure for each attachment type:

- For PDF attachments, compare by MD5. If no match, get the top 50 words
  in the attachment's text and hash those, then check again for a match.
  Update references to item keys in notes and annotations.
- For web (snapshot / link) attachments, compare by title and URL.
  Prefer a title + URL match but accept a title-only match.
- For other attachment types, keep all attachments from all items being
  merged.

Also:

- Move most merge tests from Duplicates to Items#merge(). It just doesn't
  make sense to worry about the UI in these.
This commit is contained in:
Abe Jellinek 2022-03-09 22:26:26 +00:00 committed by GitHub
parent 8e8b03e5ff
commit ef82becf00
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 753 additions and 95 deletions

View file

@ -2916,7 +2916,7 @@ Zotero.Item.prototype.deleteAttachmentFile = Zotero.Promise.coroutine(function*
* Return a file:/// URL path to files and snapshots
*/
Zotero.Item.prototype.getLocalFileURL = function() {
if (!this.isAttachment) {
if (!this.isAttachment()) {
throw ("getLocalFileURL() can only be called on attachment items");
}

View file

@ -925,19 +925,20 @@ Zotero.Items = function() {
*
* @param {Zotero.Item} fromItem
* @param {Zotero.Item} toItem
* @param {Boolean} includeTrashed
* @return {Promise}
*/
this.moveChildItems = async function (fromItem, toItem) {
this.moveChildItems = async function (fromItem, toItem, includeTrashed = false) {
//Zotero.DB.requireTransaction();
// Annotations on files
if (fromItem.isFileAttachment()) {
let fn = async function () {
let annotations = fromItem.getAnnotations();
let annotations = fromItem.getAnnotations(includeTrashed);
for (let annotation of annotations) {
annotation.parentItemID = toItem.id;
await annotation.save();
};
}
};
if (!Zotero.DB.inTransaction) {
@ -957,66 +958,40 @@ Zotero.Items = function() {
Zotero.debug("Merging items");
return Zotero.DB.executeTransaction(function* () {
var otherItemIDs = [];
var itemURI = Zotero.URI.getItemURI(item);
var replPred = Zotero.Relations.replacedItemPredicate;
var toSave = {};
toSave[item.id] = item;
var earliestDateAdded = item.dateAdded;
let remapAttachmentKeys = yield this._mergePDFAttachments(item, otherItems);
yield this._mergeWebAttachments(item, otherItems);
yield this._mergeOtherAttachments(item, otherItems);
for (let otherItem of otherItems) {
if (otherItem.libraryID !== item.libraryID) {
throw new Error('Items being merged must be in the same library');
}
// Use the earliest date added of all the items
if (otherItem.dateAdded < earliestDateAdded) {
earliestDateAdded = otherItem.dateAdded;
}
let otherItemURI = Zotero.URI.getItemURI(otherItem);
// Move child items to master
var ids = otherItem.getAttachments(true).concat(otherItem.getNotes(true));
for (let id of ids) {
var attachment = yield this.getAsync(id);
// TODO: Skip identical children?
attachment.parentID = item.id;
yield attachment.save();
// Move notes to master
var noteIDs = otherItem.getNotes(true);
for (let id of noteIDs) {
var note = yield this.getAsync(id);
note.parentItemID = item.id;
Zotero.Notes.replaceItemKey(note, otherItem.key, item.key);
Zotero.Notes.replaceAllItemKeys(note, remapAttachmentKeys);
toSave[note.id] = note;
}
// Add relations to master
let oldRelations = otherItem.getRelations();
for (let pred in oldRelations) {
oldRelations[pred].forEach(obj => item.addRelation(pred, obj));
}
// Move relations to master
yield this._moveRelations(otherItem, item);
// Remove merge-tracking relations from other item, so that there aren't two
// subjects for a given deleted object
let replItems = otherItem.getRelationsByPredicate(replPred);
for (let replItem of replItems) {
otherItem.removeRelation(replPred, replItem);
}
// Update relations on items in the library that point to the other item
// to point to the master instead
let rels = yield Zotero.Relations.getByObject('item', otherItemURI);
for (let rel of rels) {
// Skip merge-tracking relations, which are dealt with above
if (rel.predicate == replPred) continue;
// Skip items in other libraries. They might not be editable, and even
// if they are, merging items in one library shouldn't affect another library,
// so those will follow the merge-tracking relations and can optimize their
// path if they're resaved.
if (rel.subject.libraryID != item.libraryID) continue;
rel.subject.removeRelation(rel.predicate, otherItemURI);
rel.subject.addRelation(rel.predicate, itemURI);
if (!toSave[rel.subject.id]) {
toSave[rel.subject.id] = rel.subject;
}
}
// All other operations are additive only and do not affect the,
// All other operations are additive only and do not affect the
// old item, which will be put in the trash
// Add collections to master
@ -1042,12 +1017,9 @@ Zotero.Items = function() {
}
}
// Add relation to track merge
item.addRelation(replPred, otherItemURI);
// Trash other item
otherItem.deleted = true;
yield otherItem.save();
toSave[otherItem.id] = otherItem;
}
item.setField('dateAdded', earliestDateAdded);
@ -1062,6 +1034,311 @@ Zotero.Items = function() {
};
this._mergePDFAttachments = async function (item, otherItems) {
Zotero.DB.requireTransaction();
let remapAttachmentKeys = new Map();
let masterAttachmentHashes = await this._hashItem(item, 'bytes');
let hashesIncludeText = false;
for (let otherItem of otherItems) {
let mergedMasterAttachments = new Set();
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
if (!otherAttachment.isPDFAttachment()) {
continue;
}
// First check if master has an attachment with identical MD5 hash
let matchingHash = await otherAttachment.attachmentHash;
let masterAttachmentID = masterAttachmentHashes.get(matchingHash);
if (!masterAttachmentID && item.numAttachments(true)) {
// If that didn't work, hash master attachments by the
// most common words in their text and check again.
if (!hashesIncludeText) {
masterAttachmentHashes = new Map([
...masterAttachmentHashes,
...await this._hashItem(item, 'text')
]);
hashesIncludeText = true;
}
matchingHash = await this._hashAttachmentText(otherAttachment);
masterAttachmentID = masterAttachmentHashes.get(matchingHash);
}
if (!masterAttachmentID || mergedMasterAttachments.has(masterAttachmentID)) {
Zotero.debug(`No unmerged match for attachment ${otherAttachment.id} in master item - moving`);
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
continue;
}
mergedMasterAttachments.add(masterAttachmentID);
let masterAttachment = await this.getAsync(masterAttachmentID);
if (masterAttachment.attachmentContentType !== otherAttachment.attachmentContentType) {
Zotero.debug(`Master attachment ${masterAttachmentID} matches ${otherAttachment.id}, `
+ 'but content types differ - moving');
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
continue;
}
Zotero.debug(`Master attachment ${masterAttachmentID} matches ${otherAttachment.id} - merging`);
await this.moveChildItems(otherAttachment, masterAttachment, true);
await this._moveEmbeddedNote(otherAttachment, masterAttachment);
await this._moveRelations(otherAttachment, masterAttachment);
otherAttachment.deleted = true;
await otherAttachment.save();
// Later on, when processing notes, we'll use this to remap
// URLs pointing to the old attachment.
remapAttachmentKeys.set(otherAttachment.key, masterAttachment.key);
// Items can only have one replaced item predicate
if (!masterAttachment.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate)) {
masterAttachment.addRelation(Zotero.Relations.replacedItemPredicate,
Zotero.URI.getItemURI(otherAttachment));
}
await masterAttachment.save();
}
}
return remapAttachmentKeys;
};
this._mergeWebAttachments = async function (item, otherItems) {
Zotero.DB.requireTransaction();
let masterAttachments = (await this.getAsync(item.getAttachments(true)))
.filter(attachment => attachment.isWebAttachment());
for (let otherItem of otherItems) {
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
if (!otherAttachment.isWebAttachment()) {
continue;
}
// If we can find an attachment with the same title *and* URL, use it.
let masterAttachment = (
masterAttachments.find(attachment => attachment.getField('title') == otherAttachment.getField('title')
&& attachment.getField('url') == otherAttachment.getField('url')
&& attachment.attachmentLinkMode === otherAttachment.attachmentLinkMode)
|| masterAttachments.find(attachment => attachment.getField('title') == otherAttachment.getField('title')
&& attachment.attachmentLinkMode === otherAttachment.attachmentLinkMode)
);
if (!masterAttachment) {
Zotero.debug(`No match for web attachment ${otherAttachment.id} in master item - moving`);
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
continue;
}
otherAttachment.deleted = true;
await this._moveRelations(otherAttachment, masterAttachment);
await otherAttachment.save();
masterAttachment.addRelation(Zotero.Relations.replacedItemPredicate,
Zotero.URI.getItemURI(otherAttachment));
await masterAttachment.save();
// Don't match with this attachment again
masterAttachments = masterAttachments.filter(a => a !== masterAttachment);
}
}
};
this._mergeOtherAttachments = async function (item, otherItems) {
Zotero.DB.requireTransaction();
for (let otherItem of otherItems) {
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
if (otherAttachment.isPDFAttachment() || otherAttachment.isWebAttachment()) {
continue;
}
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
}
}
};
/**
* Hash each attachment of the provided item. Return a map from hashes to
* attachment IDs.
*
* @param {Zotero.Item} item
* @param {String} hashType 'bytes' or 'text'
* @return {Promise<Map<String, String>>}
*/
this._hashItem = async function (item, hashType) {
if (!['bytes', 'text'].includes(hashType)) {
throw new Error('Invalid hash type');
}
let attachments = (await this.getAsync(item.getAttachments(true)))
.filter(attachment => attachment.isFileAttachment());
let hashes = new Map();
await Promise.all(attachments.map(async (attachment) => {
let hash = hashType === 'bytes'
? await attachment.attachmentHash
: await this._hashAttachmentText(attachment);
if (hash) {
hashes.set(hash, attachment.id);
}
}));
return hashes;
};
/**
* Hash an attachment by the most common words in its text.
* @param {Zotero.Item} attachment
* @return {Promise<String>}
*/
this._hashAttachmentText = async function (attachment) {
if ((await OS.File.stat(await attachment.getFilePathAsync())).size > 5e8) {
Zotero.debug('_hashAttachmentText: Attachment too large');
return null;
}
let text = await attachment.attachmentText;
if (!text) {
Zotero.debug('_hashAttachmentText: Attachment has no text');
return null;
}
let mostCommonWords = this._getMostCommonWords(text, 50);
if (mostCommonWords.length < 10) {
Zotero.debug('_hashAttachmentText: Not enough unique words');
return null;
}
return Zotero.Utilities.Internal.md5(mostCommonWords.sort().join(' '));
};
/**
* Get the n most common words in s in descending order of frequency.
* If s contains fewer than n unique words, the size of the returned array
* will be less than n.
*
* @param {String} s
* @param {Number} n
* @return {String[]}
*/
this._getMostCommonWords = function (s, n) {
// Use an iterative approach for better performance.
const whitespaceRe = /\s/;
const wordCharRe = /\p{Letter}/u; // [a-z] only matches Latin
let freqs = new Map();
let currentWord = '';
for (let codePoint of s) {
if (whitespaceRe.test(codePoint)) {
if (currentWord.length > 3) {
freqs.set(currentWord, (freqs.get(currentWord) || 0) + 1);
}
currentWord = '';
continue;
}
if (wordCharRe.test(codePoint)) {
currentWord += codePoint.toLowerCase();
}
}
// Break ties in locale order.
return [...freqs.keys()]
.sort((a, b) => (freqs.get(b) - freqs.get(a)) || Zotero.localeCompare(a, b))
.slice(0, n);
};
/**
* Move fromItem's embedded note, if it has one, to toItem.
* If toItem already has an embedded note, the note will be added as a new
* child note item on toItem's parent.
* Requires a transaction.
*/
this._moveEmbeddedNote = async function (fromItem, toItem) {
Zotero.DB.requireTransaction();
if (fromItem.getNote()) {
let noteItem = toItem;
if (toItem.getNote()) {
noteItem = new Zotero.Item('note');
noteItem.parentItemID = toItem.parentItemID;
}
noteItem.setNote(fromItem.getNote());
fromItem.setNote('');
Zotero.Notes.replaceItemKey(noteItem, fromItem.key, toItem.key);
await noteItem.save();
}
};
/**
* Move fromItem's relations to toItem as part of a merge.
* Requires a transaction.
*
* @param {Zotero.Item} fromItem
* @param {Zotero.Item} toItem
* @return {Promise}
*/
this._moveRelations = async function (fromItem, toItem) {
Zotero.DB.requireTransaction();
let replPred = Zotero.Relations.replacedItemPredicate;
let fromURI = Zotero.URI.getItemURI(fromItem);
let toURI = Zotero.URI.getItemURI(toItem);
// Add relations to toItem
let oldRelations = fromItem.getRelations();
for (let pred in oldRelations) {
oldRelations[pred].forEach(obj => toItem.addRelation(pred, obj));
}
// Remove merge-tracking relations from fromItem, so that there aren't two
// subjects for a given deleted object
let replItems = fromItem.getRelationsByPredicate(replPred);
for (let replItem of replItems) {
fromItem.removeRelation(replPred, replItem);
}
// Update relations on items in the library that point to the other item
// to point to the master instead
let rels = await Zotero.Relations.getByObject('item', fromURI);
for (let rel of rels) {
// Skip merge-tracking relations, which are dealt with above
if (rel.predicate == replPred) continue;
// Skip items in other libraries. They might not be editable, and even
// if they are, merging items in one library shouldn't affect another library,
// so those will follow the merge-tracking relations and can optimize their
// path if they're resaved.
if (rel.subject.libraryID != toItem.libraryID) continue;
rel.subject.removeRelation(rel.predicate, fromURI);
rel.subject.addRelation(rel.predicate, toURI);
await rel.subject.save();
}
// Add relation to track merge
toItem.addRelation(replPred, fromURI);
await fromItem.save();
await toItem.save();
};
this.trash = Zotero.Promise.coroutine(function* (ids) {
Zotero.DB.requireTransaction();

View file

@ -97,6 +97,35 @@ Zotero.Notes = new function() {
});
};
/**
* Update item key URLs in the item's note, replacing all instances of each
* key in itemKeyMap with the associated value.
* Passed item should have an embedded note or be a note item.
*
* @param {Zotero.Item} item
* @param {Map<String, String>} itemKeyMap
*/
this.replaceAllItemKeys = function (item, itemKeyMap) {
let note = item.getNote();
let keys = [...itemKeyMap.keys()].join('|');
let re = new RegExp(`%2Fitems%2F(${keys})`, 'g');
note = note.replace(re, (str, key) => `%2Fitems%2F${itemKeyMap.get(key)}`);
re = new RegExp(`data-attachment-key="(${keys})"`);
note = note.replace(re, (str, key) => `data-attachment-key="${itemKeyMap.get(key)}"`);
item.setNote(note);
};
/**
* Convenience function to call replaceAllItemKeys with a single key-value pair.
*
* @param {Zotero.Item} item
* @param {String} fromItemKey
* @param {String} toItemKey
*/
this.replaceItemKey = function (item, fromItemKey, toItemKey) {
this.replaceAllItemKeys(item, new Map([[fromItemKey, toItemKey]]));
};
this.getExportableNote = async function(item) {
if (!item.isNote()) {
throw new Error('Item is not a note');

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -21,6 +21,27 @@ describe("Duplicate Items", function () {
win.close();
});
async function merge(itemID) {
var userLibraryID = Zotero.Libraries.userLibraryID;
var selected = await cv.selectByID('D' + userLibraryID);
assert.ok(selected);
await waitForItemsLoad(win);
// Select the first item, which should select both
var iv = zp.itemsView;
var row = iv.getRowIndexByID(itemID);
var promise = iv.waitForSelect();
clickOnItemsRow(win, iv, row);
await promise;
// Click merge button
var button = win.document.getElementById('zotero-duplicates-merge-button');
button.click();
await waitForNotifierEvent('refresh', 'trash');
}
describe("Merging", function () {
it("should merge two items in duplicates view", function* () {
var item1 = yield createDataObject('item', { setTitle: true });
@ -28,28 +49,10 @@ describe("Duplicate Items", function () {
yield item2.saveTx();
var uri2 = Zotero.URI.getItemURI(item2);
var userLibraryID = Zotero.Libraries.userLibraryID;
var selected = yield cv.selectByID('D' + userLibraryID);
assert.ok(selected);
yield waitForItemsLoad(win);
// Select the first item, which should select both
var iv = zp.itemsView;
var row = iv.getRowIndexByID(item1.id);
assert.isNumber(row);
var promise = iv.waitForSelect();
clickOnItemsRow(win, iv, row);
assert.equal(iv.selection.count, 2);
yield promise;
// Click merge button
var button = win.document.getElementById('zotero-duplicates-merge-button');
button.click();
yield waitForNotifierEvent('refresh', 'trash');
yield merge(item1.id);
// Items should be gone
var iv = zp.itemsView;
assert.isFalse(iv.getRowIndexByID(item1.id));
assert.isFalse(iv.getRowIndexByID(item2.id));
assert.isTrue(item2.deleted);
@ -68,26 +71,10 @@ describe("Duplicate Items", function () {
item2.setCollections([collection2.id]);
yield item2.saveTx();
var userLibraryID = Zotero.Libraries.userLibraryID;
var selected = yield cv.selectByID('D' + userLibraryID);
assert.ok(selected);
yield waitForItemsLoad(win);
// Select the first item, which should select both
var iv = zp.itemsView;
var row = iv.getRowIndexByID(item1.id);
var promise = iv.waitForSelect();
clickOnItemsRow(win, iv, row);
yield promise;
// Click merge button
var button = win.document.getElementById('zotero-duplicates-merge-button');
button.click();
yield waitForNotifierEvent('refresh', 'trash');
yield merge(item1.id);
// Items should be gone
var iv = zp.itemsView;
assert.isFalse(iv.getRowIndexByID(item1.id));
assert.isFalse(iv.getRowIndexByID(item2.id));
assert.isTrue(item2.deleted);

View file

@ -413,6 +413,371 @@ describe("Zotero.Items", function () {
var rels = item3.getRelationsByPredicate(predicate);
assert.deepEqual(rels, [item2URI]);
})
it("should merge identical attachments based on file hash", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importPDFAttachment(item1);
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importPDFAttachment(item2);
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
});
it("should merge one attachment per item into the master attachment", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importPDFAttachment(item1);
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importPDFAttachment(item2);
let item3 = item1.clone();
await item3.saveTx();
let attachment3 = await importPDFAttachment(item3);
await Zotero.Items.merge(item1, [item2, item3]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
assert.isTrue(item3.deleted);
assert.isTrue(attachment3.deleted);
});
it("should merge identical attachments based on content hash", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importPDFAttachment(item1);
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_test_new_md5.pdf', { parentItemID: item2.id });
assert.equal(await attachment1.attachmentText, await attachment2.attachmentText);
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
});
it("shouldn't merge based on content hash when files are empty", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('empty.pdf', { parentItemID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_empty_new_md5.pdf', { parentItemID: item2.id });
assert.equal(await attachment1.attachmentText, await attachment2.attachmentText);
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
assert.isEmpty(await attachment1.attachmentText);
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 2);
assert.isTrue(item2.deleted);
assert.isFalse(attachment2.deleted);
});
it("should allow small differences when hashing content", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('duplicatesMerge_JSTOR_1.pdf', { parentItemID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_JSTOR_2.pdf', { parentItemID: item2.id });
assert.notEqual(await attachment1.attachmentText, await attachment2.attachmentText);
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
assert.equal(
(await Zotero.Items._hashAttachmentText(attachment1)).fromText,
(await Zotero.Items._hashAttachmentText(attachment2)).fromText
);
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
});
it("should keep similar but not identical attachments separate", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('wonderland_short.pdf', { parentItemID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('wonderland_long.pdf', { parentItemID: item2.id });
assert.notEqual(await attachment1.attachmentText, await attachment2.attachmentText);
assert.notEqual(await attachment1.attachmentHash, await attachment2.attachmentHash);
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 2);
assert.isTrue(item2.deleted);
assert.isFalse(attachment2.deleted);
});
it("should only match attachments one-to-one", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('wonderland_short_watermarked_1.pdf', { parentItemID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('wonderland_short_watermarked_2.pdf', { parentItemID: item2.id });
let attachment3 = await importFileAttachment('wonderland_short_watermarked_2.pdf', { parentItemID: item2.id });
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 2);
assert.isTrue(item2.deleted);
// Doesn't matter which got merged
assert.isTrue((attachment2.deleted || attachment3.deleted) && !(attachment2.deleted && attachment3.deleted));
});
it("should copy annotations when merging", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importPDFAttachment(item1);
let annotation1 = await createAnnotation('note', attachment1);
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importPDFAttachment(item2);
let annotation2 = await createAnnotation('highlight', attachment2);
let annotation2Note = await Zotero.EditorInstance.createNoteFromAnnotations([annotation2], item2.id);
assert.include(annotation2Note.getNote(), attachment2.key);
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.isFalse(annotation1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
assert.isFalse(annotation2.deleted);
assert.equal(annotation1.parentItemID, attachment1.id);
assert.equal(annotation2.parentItemID, attachment1.id);
assert.notInclude(annotation2Note.getNote(), item2.key);
assert.include(annotation2Note.getNote(), item1.key);
assert.notInclude(annotation2Note.getNote(), attachment2.key);
assert.include(annotation2Note.getNote(), attachment1.key);
});
it("should update all item keys when moving notes", async function () {
let attachmentFilenames = [
'recognizePDF_test_arXiv.pdf',
'recognizePDF_test_DOI.pdf',
'recognizePDF_test_title.pdf'
];
let item1 = await createDataObject('item', { setTitle: true });
let attachments1 = [];
for (let filename of attachmentFilenames) {
let attachment = await importFileAttachment(filename, { parentID: item1.id });
attachments1.push(attachment);
}
let item2 = item1.clone();
await item2.saveTx();
let attachments2 = [];
let annotations2 = [];
let notes2 = [];
for (let filename of attachmentFilenames) {
let attachment = await importFileAttachment(filename, { parentID: item2.id });
let annotation = await createAnnotation('highlight', attachment);
let note = await Zotero.EditorInstance.createNoteFromAnnotations([annotation], item2.id);
attachments2.push(attachment);
annotations2.push(annotation);
notes2.push(note);
assert.include(note.getNote(), item2.key);
assert.include(note.getNote(), attachment.key);
}
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.equal(item1.numAttachments(true), 3);
assert.isTrue(item2.deleted);
for (let i = 0; i < 3; i++) {
let attachment1 = attachments1[i];
let attachment2 = attachments2[i];
let note = notes2[i];
assert.equal(note.parentItemID, item1.id);
assert.include(note.getNote(), item1.key);
assert.notInclude(note.getNote(), item2.key);
assert.include(note.getNote(), attachment1.key);
assert.notInclude(note.getNote(), attachment2.key);
}
});
it("should merge snapshots with the same title, even if URL differs", async function () {
let content = getTestDataDirectory();
content.append('snapshot');
content.append('index.html');
let snapshotContent = await Zotero.File.getContentsAsync(content);
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await Zotero.Attachments.importFromSnapshotContent({
parentItemID: item1.id,
url: 'https://example.com/test.html',
title: 'Snapshot',
snapshotContent
});
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await Zotero.Attachments.importFromSnapshotContent({
parentItemID: item2.id,
url: 'https://otherdomain.example.com/test.html',
title: 'Snapshot',
snapshotContent
});
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
});
it("should merge linked URLs", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await Zotero.Attachments.linkFromURL({
url: 'https://example.com/',
title: 'Catalog Entry',
parentItemID: item1.id
});
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await Zotero.Attachments.linkFromURL({
url: 'https://example.com/',
title: 'Catalog Entry',
parentItemID: item2.id
});
let attachment3 = await Zotero.Attachments.linkFromURL({
url: 'https://example.com/',
title: 'Catalog Entry',
parentItemID: item2.id
});
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(attachment1.getField('url'), 'https://example.com/');
assert.equal(item1.numAttachments(true), 2);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
assert.equal(attachment3.parentItemID, item1.id);
assert.isFalse(attachment3.deleted);
});
it("should keep web attachment with same URL but different title", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await Zotero.Attachments.linkFromURL({
url: 'https://example.com/',
title: 'Catalog Entry',
parentItemID: item1.id
});
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await Zotero.Attachments.linkFromURL({
url: 'https://example.com/',
title: 'Official Website',
parentItemID: item2.id
});
let attachment3 = await Zotero.Attachments.linkFromURL({
url: 'https://example.com/',
title: 'Catalog Entry',
parentItemID: item2.id
});
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(attachment1.getField('url'), 'https://example.com/');
assert.equal(item1.numAttachments(true), 2);
assert.isTrue(item2.deleted);
assert.equal(attachment2.parentItemID, item1.id);
assert.isFalse(attachment2.deleted);
assert.isTrue(attachment3.deleted);
});
it("should move related items of merged attachments", async function () {
let relatedItem = await createDataObject('item');
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importPDFAttachment(item1);
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importPDFAttachment(item2);
attachment2.addRelatedItem(relatedItem);
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
assert.lengthOf(attachment1.relatedItems, 1);
assert.equal(attachment1.relatedItems[0], relatedItem.key);
});
it("should move merge-tracking relation from replaced attachment to master attachment", async function () {
let item1 = await createDataObject('item');
let attachment1 = await importPDFAttachment(item1);
let item2 = await createDataObject('item');
let attachment2 = await importPDFAttachment(item2);
let attachment2URI = Zotero.URI.getItemURI(attachment2);
let item3 = await createDataObject('item');
let attachment3 = await importPDFAttachment(item3);
let attachment3URI = Zotero.URI.getItemURI(attachment3);
await Zotero.Items.merge(item2, [item3]);
await Zotero.Items.merge(item1, [item2]);
var rels = attachment1.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate);
assert.lengthOf(rels, 2);
assert.sameMembers(rels, [attachment2URI, attachment3URI]);
});
})