Duplicates Merge: Preserve embedded annotations (#2728)

This commit is contained in:
Abe Jellinek 2022-07-29 05:06:44 -04:00 committed by Dan Stillman
parent 63148dff3b
commit 1f9e518581
7 changed files with 139 additions and 20 deletions

View file

@ -3951,6 +3951,29 @@ Zotero.Item.prototype.getAnnotations = function (includeTrashed) {
};
/**
* Determine if the item is a PDF attachment that exists on disk and contains
* embedded markup annotations.
*
* @return {Promise<Boolean>}
*/
Zotero.Item.prototype.hasEmbeddedAnnotations = async function () {
if (!this.isPDFAttachment()) {
return false;
}
let path = await this.getFilePathAsync();
if (!path) {
return false;
}
let contents = await Zotero.File.getContentsAsync(path);
// Check for "markup" annotations per the PDF spec
// https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf, p. 390
let re = /\s\/Subtype\s+\/(Text|FreeText|Line|Square|Circle|Polygon|PolyLine|Highlight|Underline|Squiggly|StrikeOut|Stamp|Caret|Ink|FileAttachment|Sound|Redact)\s/;
return re.test(contents);
};
//
// Methods dealing with item tags

View file

@ -1034,6 +1034,29 @@ Zotero.Items = function() {
for (let otherItem of otherItems) {
let mergedMasterAttachments = new Set();
let doMerge = async (fromAttachment, toAttachment) => {
mergedMasterAttachments.add(toAttachment.id);
await this.moveChildItems(fromAttachment, toAttachment, true);
await this._moveEmbeddedNote(fromAttachment, toAttachment);
await this._moveRelations(fromAttachment, toAttachment);
fromAttachment.deleted = true;
await fromAttachment.save();
// Later on, when processing notes, we'll use this to remap
// URLs pointing to the old attachment.
remapAttachmentKeys.set(fromAttachment.key, toAttachment.key);
// Items can only have one replaced item predicate
if (!toAttachment.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate)) {
toAttachment.addRelation(Zotero.Relations.replacedItemPredicate,
Zotero.URI.getItemURI(fromAttachment));
}
await toAttachment.save();
};
for (let otherAttachment of await this.getAsync(otherItem.getAttachments(true))) {
if (!otherAttachment.isPDFAttachment()) {
continue;
@ -1064,13 +1087,12 @@ Zotero.Items = function() {
await otherAttachment.save();
continue;
}
mergedMasterAttachments.add(masterAttachmentID);
let masterAttachment = await this.getAsync(masterAttachmentID);
if (masterAttachment.attachmentContentType !== otherAttachment.attachmentContentType) {
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
+ 'but content types differ - moving');
+ 'but content types differ - keeping both');
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
continue;
@ -1079,31 +1101,35 @@ Zotero.Items = function() {
if (!((masterAttachment.isImportedAttachment() && otherAttachment.isImportedAttachment())
|| (masterAttachment.isLinkedFileAttachment() && otherAttachment.isLinkedFileAttachment()))) {
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
+ 'but link modes differ - moving');
+ 'but link modes differ - keeping both');
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
continue;
}
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key} - merging`);
await this.moveChildItems(otherAttachment, masterAttachment, true);
await this._moveEmbeddedNote(otherAttachment, masterAttachment);
await this._moveRelations(otherAttachment, masterAttachment);
otherAttachment.deleted = true;
await otherAttachment.save();
// Later on, when processing notes, we'll use this to remap
// URLs pointing to the old attachment.
remapAttachmentKeys.set(otherAttachment.key, masterAttachment.key);
// Items can only have one replaced item predicate
if (!masterAttachment.getRelationsByPredicate(Zotero.Relations.replacedItemPredicate)) {
masterAttachment.addRelation(Zotero.Relations.replacedItemPredicate,
Zotero.URI.getItemURI(otherAttachment));
// Check whether master and other have embedded annotations
// Master yes, other yes -> keep both
// Master yes, other no -> keep master
// Master no, other yes -> keep other
if (await otherAttachment.hasEmbeddedAnnotations()) {
if (await masterAttachment.hasEmbeddedAnnotations()) {
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
+ 'but both have embedded annotations - keeping both');
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
}
else {
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key}, `
+ 'but other has embedded annotations - merging into other');
await doMerge(masterAttachment, otherAttachment);
otherAttachment.parentItemID = item.id;
await otherAttachment.save();
}
continue;
}
await masterAttachment.save();
Zotero.debug(`Master attachment ${masterAttachment.key} matches ${otherAttachment.key} - merging into master`);
await doMerge(otherAttachment, masterAttachment);
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1475,6 +1475,23 @@ describe("Zotero.Item", function () {
assert.sameMembers(items, [annotation1, annotation2]);
});
});
describe("#hasEmbeddedAnnotations()", function () {
it("should recognize a highlight annotation", async function () {
let attachment = await importFileAttachment('duplicatesMerge_annotated_1.pdf');
assert.isTrue(await attachment.hasEmbeddedAnnotations());
});
it("should recognize a strikeout annotation", async function () {
let attachment = await importFileAttachment('duplicatesMerge_annotated_2.pdf');
assert.isTrue(await attachment.hasEmbeddedAnnotations());
});
it("should not recognize a link annotation", async function () {
let attachment = await importFileAttachment('duplicatesMerge_notAnnotated.pdf');
assert.isFalse(await attachment.hasEmbeddedAnnotations());
});
});
describe("#isEditable()", function () {
var group;

View file

@ -906,6 +906,59 @@ describe("Zotero.Items", function () {
assert.isFalse(attachment2.deleted);
assert.equal(attachment2.parentItemID, item1.id);
});
it("should not merge two matching PDF attachments with embedded annotations", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('duplicatesMerge_annotated_1.pdf', { parentID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_annotated_2.pdf', { parentID: item2.id });
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 2);
assert.isTrue(item2.deleted);
assert.isFalse(attachment2.deleted);
assert.equal(attachment2.parentItemID, item1.id);
});
it("should merge a non-master PDF without embedded annotations into a master PDF with embedded annotations", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('duplicatesMerge_annotated_1.pdf', { parentID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_notAnnotated.pdf', { parentID: item2.id });
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isFalse(attachment1.deleted);
assert.equal(item1.numAttachments(true), 1);
assert.isTrue(item2.deleted);
assert.isTrue(attachment2.deleted);
});
it("should merge a master PDF without embedded annotations into a non-master PDF with embedded annotations", async function () {
let item1 = await createDataObject('item', { setTitle: true });
let attachment1 = await importFileAttachment('duplicatesMerge_notAnnotated.pdf', { parentID: item1.id });
let item2 = item1.clone();
await item2.saveTx();
let attachment2 = await importFileAttachment('duplicatesMerge_annotated_1.pdf', { parentID: item2.id });
await Zotero.Items.merge(item1, [item2]);
assert.isFalse(item1.deleted);
assert.isTrue(attachment1.deleted);
assert.equal(item1.numAttachments(false), 1); // Don't count the deleted attachment
assert.isTrue(item2.deleted);
assert.isFalse(attachment2.deleted);
assert.equal(attachment2.parentItemID, item1.id);
});
})