Duplicates Merge: Preserve embedded annotations (#2728)

This commit is contained in:
Abe Jellinek 2022-07-29 05:06:44 -04:00 committed by Dan Stillman
parent 63148dff3b
commit 1f9e518581
7 changed files with 139 additions and 20 deletions

View file

@ -3951,6 +3951,29 @@ Zotero.Item.prototype.getAnnotations = function (includeTrashed) {
}; };
/**
* Determine if the item is a PDF attachment that exists on disk and contains
* embedded markup annotations.
*
* @return {Promise<Boolean>}
*/
Zotero.Item.prototype.hasEmbeddedAnnotations = async function () {
if (!this.isPDFAttachment()) {
return false;
}
let path = await this.getFilePathAsync();
if (!path) {
return false;
}
let contents = await Zotero.File.getContentsAsync(path);
// Check for "markup" annotations per the PDF spec
// https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf, p. 390
let re = /\s\/Subtype\s+\/(Text|FreeText|Line|Square|Circle|Polygon|PolyLine|Highlight|Underline|Squiggly|StrikeOut|Stamp|Caret|Ink|FileAttachment|Sound|Redact)\s/;
return re.test(contents);
};
// //
// Methods dealing with item tags // Methods dealing with item tags

View file

@ -1034,6 +1034,29 @@ Zotero.Items = function() {
for (let otherItem of otherItems) { for (let otherItem of otherItems) {
let mergedMasterAttachments = new Set(); let mergedMasterAttachments = new Set();
let doMerge = async (fromAttachment, toAttachment) => {
mergedMasterAttachments.add(toAttachment.id);
await this.moveChildItems(fromAttachment, toAttachment, true);
await this._moveEmbeddedNote(fromAttachment, toAttachment);
await this._moveRelations(fromAttachment, toAttachment);
fromAttachment.deleted = true;
await fromAttachment.save();
// Later on, when processing notes, we'll use this to remap
// URLs pointing to the old attachment.
remapAttachmentKeys.set(fromAttachment.key, toAttachment.key);
// Items can only have one replaced item predicate
if (!toAttachment.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate)) {
toAttachment.addRelation(Zotero.Relations.replacedItemPredicate,
Zotero.URI.getItemURI(fromAttachment));
}
await toAttachment.save();
};
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) { for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
if (!otherAttachment.isPDFAttachment()) { if (!otherAttachment.isPDFAttachment()) {
continue; continue;
@ -1064,13 +1087,12 @@ Zotero.Items = function() {
await otherAttachment.save(); await otherAttachment.save();
continue; continue;
} }
mergedMasterAttachments.add(masterAttachmentID);
let masterAttachment = await this.getAsync(masterAttachmentID); let masterAttachment = await this.getAsync(masterAttachmentID);
if (masterAttachment.attachmentContentType !== otherAttachment.attachmentContentType) { if (masterAttachment.attachmentContentType !== otherAttachment.attachmentContentType) {
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, ` Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
+ 'but content types differ - moving'); + 'but content types differ - keeping both');
otherAttachment.parentItemID = item.id; otherAttachment.parentItemID = item.id;
await otherAttachment.save(); await otherAttachment.save();
continue; continue;
@ -1079,31 +1101,35 @@ Zotero.Items = function() {
if (!((masterAttachment.isImportedAttachment() && otherAttachment.isImportedAttachment()) if (!((masterAttachment.isImportedAttachment() && otherAttachment.isImportedAttachment())
|| (masterAttachment.isLinkedFileAttachment() && otherAttachment.isLinkedFileAttachment()))) { || (masterAttachment.isLinkedFileAttachment() && otherAttachment.isLinkedFileAttachment()))) {
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, ` Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
+ 'but link modes differ - moving'); + 'but link modes differ - keeping both');
otherAttachment.parentItemID = item.id; otherAttachment.parentItemID = item.id;
await otherAttachment.save(); await otherAttachment.save();
continue; continue;
} }
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key} - merging`); // Check whether master and other have embedded annotations
await this.moveChildItems(otherAttachment, masterAttachment, true); // Master yes, other yes -> keep both
await this._moveEmbeddedNote(otherAttachment, masterAttachment); // Master yes, other no -> keep master
await this._moveRelations(otherAttachment, masterAttachment); // Master no, other yes -> keep other
if (await otherAttachment.hasEmbeddedAnnotations()) {
otherAttachment.deleted = true; if (await masterAttachment.hasEmbeddedAnnotations()) {
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
+ 'but both have embedded annotations - keeping both');
otherAttachment.parentItemID = item.id;
await otherAttachment.save(); await otherAttachment.save();
}
// Later on, when processing notes, we'll use this to remap else {
// URLs pointing to the old attachment. Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
remapAttachmentKeys.set(otherAttachment.key, masterAttachment.key); + 'but other has embedded annotations - merging into other');
await doMerge(masterAttachment, otherAttachment);
// Items can only have one replaced item predicate otherAttachment.parentItemID = item.id;
if (!masterAttachment.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate)) { await otherAttachment.save();
masterAttachment.addRelation(Zotero.Relations.replacedItemPredicate, }
Zotero.URI.getItemURI(otherAttachment)); continue;
} }
await masterAttachment.save(); Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key} - merging into master`);
await doMerge(otherAttachment, masterAttachment);
} }
} }

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1476,6 +1476,23 @@ describe("Zotero.Item", function () {
}); });
}); });
describe("#hasEmbeddedAnnotations()", function () {
it("should recognize a highlight annotation", async function () {
let attachment = await importFileAttachment('duplicatesMerge_annotated_1.pdf');
assert.isTrue(await attachment.hasEmbeddedAnnotations());
});
it("should recognize a strikeout annotation", async function () {
let attachment = await importFileAttachment('duplicatesMerge_annotated_2.pdf');
assert.isTrue(await attachment.hasEmbeddedAnnotations());
});
it("should not recognize a link annotation", async function () {
let attachment = await importFileAttachment('duplicatesMerge_notAnnotated.pdf');
assert.isFalse(await attachment.hasEmbeddedAnnotations());
});
});
describe("#isEditable()", function () { describe("#isEditable()", function () {
var group; var group;
var groupAttachment; var groupAttachment;

View file

@ -906,6 +906,59 @@ describe("Zotero.Items", function () {
assert.isFalse(attachment2.deleted); assert.isFalse(attachment2.deleted);
assert.equal(attachment2.parentItemID, item1.id); assert.equal(attachment2.parentItemID, item1.id);
}); });
it("should not merge two matching PDF attachments with embedded annotations", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('duplicatesMerge_annotated_1.pdf', { parentID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_annotated_2.pdf', { parentID: item2.id });
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 2);
assert.isTrue(item2.deleted);
assert.isFalse(attachment2.deleted);
assert.equal(attachment2.parentItemID, item1.id);
});
it("should merge a non-master PDF without embedded annotations into a master PDF with embedded annotations", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('duplicatesMerge_annotated_1.pdf', { parentID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_notAnnotated.pdf', { parentID: item2.id });
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
});
it("should merge a master PDF without embedded annotations into a non-master PDF with embedded annotations", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('duplicatesMerge_notAnnotated.pdf', { parentID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_annotated_1.pdf', { parentID: item2.id });
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isTrue(attachment1.deleted);
assert.equal(item1.numAttachments(false), 1); // Don't count the deleted attachment
assert.isTrue(item2.deleted);
assert.isFalse(attachment2.deleted);
assert.equal(attachment2.parentItemID, item1.id);
});
}) })