Automatically retrieve metadata when saving PDFs

Applies to dragging to the collections pane or the items pane, adding
via New Item menu, or saving via the connector server

If the renaming pref is enabled, the PDF is renamed after recognition.

Can be disabled in the preferences

Closes #917
This commit is contained in:
Dan Stillman 2018-02-28 21:43:18 -05:00
parent 081793f72f
commit 97661539dc
12 changed files with 192 additions and 32 deletions

View file

@ -37,6 +37,7 @@
<preference id="pref-reportTranslationFailure" name="extensions.zotero.reportTranslationFailure" type="bool"/>
<preference id="pref-automaticSnapshots" name="extensions.zotero.automaticSnapshots" type="bool"/>
<preference id="pref-downloadAssociatedFiles" name="extensions.zotero.downloadAssociatedFiles" type="bool"/>
<preference id="pref-autoRecognizeFiles" name="extensions.zotero.autoRecognizeFiles" type="bool"/>
<preference id="pref-renameAttachmentFiles" name="extensions.zotero.renameAttachmentFiles.automatic" type="bool"/>
<preference id="pref-automaticTags" name="extensions.zotero.automaticTags" type="bool"/>
<preference id="pref-trashAutoEmptyDays" name="extensions.zotero.trashAutoEmptyDays" type="int"/>
@ -123,6 +124,7 @@
label="&zotero.preferences.automaticSnapshots;"
preference="pref-automaticSnapshots"/>
<checkbox label="&zotero.preferences.downloadAssociatedFiles;" preference="pref-downloadAssociatedFiles"/>
<checkbox label="&zotero.preferences.autoRecognizeFiles;" preference="pref-autoRecognizeFiles"/>
<checkbox label="&zotero.preferences.renameAttachmentFiles;" preference="pref-renameAttachmentFiles"/>
<checkbox label="&zotero.preferences.automaticTags;" preference="pref-automaticTags"/>
<hbox align="center">

View file

@ -257,7 +257,7 @@ Zotero.Attachments = new function(){
* @param {String} [options.referrer]
* @param {CookieSandbox} [options.cookieSandbox]
* @param {Object} [options.saveOptions]
* @return {Promise<Zotero.Item>} - A promise for the created attachment item
* @return {Promise<Zotero.Item|false>} - A promise for the created attachment item
*/
this.importFromURL = Zotero.Promise.coroutine(function* (options) {
var libraryID = options.libraryID;
@ -298,7 +298,7 @@ Zotero.Attachments = new function(){
if (channel.responseStatus < 200 || channel.responseStatus >= 400) {
reject(new Error("Invalid response " + channel.responseStatus + " "
+ channel.responseStatusText + " for '" + url + "'"));
return;
return false;
}
}
try {

View file

@ -2242,19 +2242,20 @@ Zotero.CollectionTreeView.prototype.drop = Zotero.Promise.coroutine(function* (r
}
else if (dataType == 'text/x-moz-url' || dataType == 'application/x-moz-file') {
var targetLibraryID = targetTreeRow.ref.libraryID;
if (targetTreeRow.isCollection()) {
var parentCollectionID = targetTreeRow.ref.id;
}
else {
var parentCollectionID = false;
}
var addedItems = [];
for (var i=0; i<data.length; i++) {
var file = data[i];
if (dataType == 'text/x-moz-url') {
var url = data[i];
let item;
if (url.indexOf('file:///') == 0) {
let win = Services.wm.getMostRecentWindow("navigator:browser");
@ -2284,13 +2285,13 @@ Zotero.CollectionTreeView.prototype.drop = Zotero.Promise.coroutine(function* (r
}
if (dropEffect == 'link') {
yield Zotero.Attachments.linkFromFile({
item = yield Zotero.Attachments.linkFromFile({
file: file,
collections: parentCollectionID ? [parentCollectionID] : undefined
});
}
else {
yield Zotero.Attachments.importFromFile({
item = yield Zotero.Attachments.importFromFile({
file: file,
libraryID: targetLibraryID,
collections: parentCollectionID ? [parentCollectionID] : undefined
@ -2305,7 +2306,12 @@ Zotero.CollectionTreeView.prototype.drop = Zotero.Promise.coroutine(function* (r
}
}
}
addedItems.push(item);
}
// Automatically retrieve metadata for PDFs
Zotero.RecognizePDF.autoRecognizeItems(addedItems);
}
});

View file

@ -3236,6 +3236,7 @@ Zotero.ItemTreeView.prototype.drop = Zotero.Promise.coroutine(function* (row, or
var parentCollectionID = collectionTreeRow.ref.id;
}
let addedItems = [];
var notifierQueue = new Zotero.Notifier.Queue;
try {
// If there's a single file being added to a parent, automatic renaming is enabled,
@ -3275,13 +3276,14 @@ Zotero.ItemTreeView.prototype.drop = Zotero.Promise.coroutine(function* (row, or
// Still string, so remote URL
if (typeof file == 'string') {
let item;
if (parentItemID) {
if (!collectionTreeRow.filesEditable) {
let win = Services.wm.getMostRecentWindow("navigator:browser");
win.ZoteroPane.displayCannotEditLibraryFilesMessage();
return;
}
yield Zotero.Attachments.importFromURL({
item = yield Zotero.Attachments.importFromURL({
libraryID: targetLibraryID,
url,
renameIfAllowedType,
@ -3293,7 +3295,10 @@ Zotero.ItemTreeView.prototype.drop = Zotero.Promise.coroutine(function* (row, or
}
else {
let win = Services.wm.getMostRecentWindow("navigator:browser");
win.ZoteroPane.addItemFromURL(url, 'temporaryPDFHack'); // TODO: don't do this
item = yield win.ZoteroPane.addItemFromURL(url, 'temporaryPDFHack'); // TODO: don't do this
}
if (item) {
addedItems.push(item);
}
continue;
}
@ -3311,6 +3316,7 @@ Zotero.ItemTreeView.prototype.drop = Zotero.Promise.coroutine(function* (row, or
);
}
let item;
if (dropEffect == 'link') {
// Rename linked file, with unique suffix if necessary
try {
@ -3331,7 +3337,7 @@ Zotero.ItemTreeView.prototype.drop = Zotero.Promise.coroutine(function* (row, or
Zotero.logError(e);
}
yield Zotero.Attachments.linkFromFile({
item = yield Zotero.Attachments.linkFromFile({
file,
parentItemID,
collections: parentCollectionID ? [parentCollectionID] : undefined,
@ -3347,7 +3353,7 @@ Zotero.ItemTreeView.prototype.drop = Zotero.Promise.coroutine(function* (row, or
continue;
}
yield Zotero.Attachments.importFromFile({
item = yield Zotero.Attachments.importFromFile({
file,
fileBaseName,
libraryID: targetLibraryID,
@ -3367,11 +3373,20 @@ Zotero.ItemTreeView.prototype.drop = Zotero.Promise.coroutine(function* (row, or
}
}
}
if (item) {
addedItems.push(item);
}
}
}
finally {
yield Zotero.Notifier.commit(notifierQueue);
}
// Automatically retrieve metadata for PDFs
if (!parentItemID) {
Zotero.RecognizePDF.autoRecognizeItems(addedItems);
}
}
});

View file

@ -76,6 +76,25 @@ Zotero.RecognizePDF = new function () {
_processQueue();
};
this.autoRecognizeItems = function (items) {
if (!Zotero.Prefs.get('autoRecognizeFiles')) return;
var pdfs = items.filter((item) => {
return item
&& item.isFileAttachment()
&& item.attachmentContentType == 'application/pdf';
});
if (!pdfs.length) {
return;
}
this.recognizeItems(pdfs);
let pane = Zotero.getActiveZoteroPane();
if (pane) {
Zotero_RecognizePDF_Dialog.open();
}
};
/**
* Returns all rows
* @return {Array}

View file

@ -649,7 +649,13 @@ Zotero.Server.Connector.SaveSnapshot.prototype = {
contentType: "application/pdf",
cookieSandbox
});
yield session.addItem(item);
if (item) {
yield session.addItem(item);
// Automatically recognize PDF
Zotero.RecognizePDF.autoRecognizeItems([item]);
}
return 201;
}
catch (e) {

View file

@ -85,6 +85,7 @@ var ZoteroPane = new function()
// Set key down handler
document.getElementById('appcontent').addEventListener('keydown', ZoteroPane_Local.handleKeyDown, true);
// Hide or show the PDF recognizer button
Zotero.RecognizePDF.addListener('empty', function (row) {
document.getElementById('zotero-tb-recognize').hidden = true;
});
@ -3692,6 +3693,7 @@ var ZoteroPane = new function()
files.push(file.path);
}
var addedItems = [];
var collection;
var fileBaseName;
if (parentItemID) {
@ -3713,6 +3715,8 @@ var ZoteroPane = new function()
}
for (let file of files) {
let item;
if (link) {
// Rename linked file, with unique suffix if necessary
try {
@ -3733,7 +3737,7 @@ var ZoteroPane = new function()
Zotero.logError(e);
}
let item = yield Zotero.Attachments.linkFromFile({
item = yield Zotero.Attachments.linkFromFile({
file,
parentItemID,
collections: collection ? [collection] : undefined
@ -3746,7 +3750,7 @@ var ZoteroPane = new function()
continue;
}
yield Zotero.Attachments.importFromFile({
item = yield Zotero.Attachments.importFromFile({
file,
libraryID,
fileBaseName,
@ -3754,6 +3758,13 @@ var ZoteroPane = new function()
collections: collection ? [collection] : undefined
});
}
addedItems.push(item);
}
// Automatically retrieve metadata for top-level PDFs
if (!parentItemID) {
Zotero.RecognizePDF.autoRecognizeItems(addedItems);
}
});
@ -3917,6 +3928,9 @@ var ZoteroPane = new function()
});
/**
* @return {Zotero.Item|false} - The saved item, or false if item can't be saved
*/
this.addItemFromURL = Zotero.Promise.coroutine(function* (url, itemType, saveSnapshot, row) {
if (window.content && url == window.content.document.location.href) {
return this.addItemFromPage(itemType, saveSnapshot, row);
@ -3932,8 +3946,8 @@ var ZoteroPane = new function()
var processor = function (doc) {
return ZoteroPane_Local.addItemFromDocument(doc, itemType, saveSnapshot, row)
.then(function () {
deferred.resolve()
.then(function (item) {
deferred.resolve(item)
});
};
var done = function () {}
@ -3966,7 +3980,7 @@ var ZoteroPane = new function()
if (!ZoteroPane_Local.canEdit(row)) {
ZoteroPane_Local.displayCannotEditLibraryMessage();
return;
return false;
}
if (row !== undefined) {
@ -3983,7 +3997,7 @@ var ZoteroPane = new function()
if (!ZoteroPane_Local.canEditFiles(row)) {
ZoteroPane_Local.displayCannotEditLibraryFilesMessage();
return;
return false;
}
if (collectionTreeRow && collectionTreeRow.isCollection()) {
@ -4000,7 +4014,7 @@ var ZoteroPane = new function()
contentType: mimeType
});
this.selectItem(attachmentItem.id)
return;
return attachmentItem;
}
}
@ -4033,7 +4047,7 @@ var ZoteroPane = new function()
}
}
return item.id;
return item;
}
});
@ -4521,6 +4535,12 @@ var ZoteroPane = new function()
};
this.recognizeSelected = function() {
Zotero.RecognizePDF.recognizeItems(ZoteroPane.getSelectedItems());
Zotero_RecognizePDF_Dialog.open();
};
this.createParentItemsFromSelected = Zotero.Promise.coroutine(function* () {
if (!this.canEdit()) {
this.displayCannotEditLibraryMessage();
@ -4954,11 +4974,6 @@ var ZoteroPane = new function()
if(_beforeReloadFunctions.indexOf(func) === -1) _beforeReloadFunctions.push(func);
}
this.recognizeSelected = function() {
Zotero.RecognizePDF.recognizeItems(ZoteroPane.getSelectedItems());
Zotero_RecognizePDF_Dialog.open();
};
/**
* Implements nsIObserver for Zotero reload
*/

View file

@ -27,6 +27,7 @@
<!ENTITY zotero.preferences.parseRISRefer "Use Zotero for downloaded BibTeX/RIS/Refer files">
<!ENTITY zotero.preferences.automaticSnapshots "Automatically take snapshots when creating items from web pages">
<!ENTITY zotero.preferences.downloadAssociatedFiles "Automatically attach associated PDFs and other files when saving items">
<!ENTITY zotero.preferences.autoRecognizeFiles "Automatically retrieve metadata for PDFs">
<!ENTITY zotero.preferences.renameAttachmentFiles "Automatically rename attachment files using parent metadata">
<!ENTITY zotero.preferences.automaticTags "Automatically tag items with keywords and subject headings">
<!ENTITY zotero.preferences.trashAutoEmptyDaysPre "Automatically remove items in the trash deleted more than">

View file

@ -35,6 +35,7 @@ pref("extensions.zotero.automaticTags",true);
pref("extensions.zotero.fontSize", "1.0");
pref("extensions.zotero.layout", "standard");
pref("extensions.zotero.recursiveCollections", false);
pref("extensions.zotero.autoRecognizeFiles", true);
pref("extensions.zotero.renameAttachmentFiles.automatic", true);
pref("extensions.zotero.renameAttachmentFiles.automatic.fileTypes", "application/pdf");
pref("extensions.zotero.attachmentRenameFormatString", '{%c - }{%y - }{%t{50}}');

View file

@ -19,6 +19,16 @@ function waitForDOMEvent(target, event, capture) {
return deferred.promise;
}
async function waitForRecognizer() {
var win = await waitForWindow('chrome://zotero/content/recognizePDFDialog.xul')
// Wait for status to show as complete
var completeStr = Zotero.getString("recognizePDF.complete.label");
while (win.document.getElementById("label").value != completeStr) {
await Zotero.Promise.delay(20);
}
return win;
}
/**
* Open a chrome window and return a promise for the window
*

View file

@ -695,11 +695,11 @@ describe("Zotero.ItemTreeView", function() {
file.append(pdfFilename);
pdfPath = file.path;
httpd.registerFile("/" + pdfFilename, file);
Zotero.Prefs.clear('renameAttachmentFiles.automatic');
});
afterEach(() => {
beforeEach(() => {
// Don't run recognize on every file
Zotero.Prefs.set('autoRecognizeFiles', false);
Zotero.Prefs.clear('renameAttachmentFiles.automatic');
});
@ -707,6 +707,9 @@ describe("Zotero.ItemTreeView", function() {
var defer = new Zotero.Promise.defer();
httpd.stop(() => defer.resolve());
yield defer.promise;
Zotero.Prefs.clear('autoRecognizeFiles');
Zotero.Prefs.clear('renameAttachmentFiles.automatic');
});
it("should move a child item from one item to another", function* () {
@ -879,6 +882,62 @@ describe("Zotero.ItemTreeView", function() {
);
});
it("should automatically retrieve metadata for top-level PDF if pref is enabled", async function () {
Zotero.Prefs.set('autoRecognizeFiles', true);
var view = zp.itemsView;
var promise = waitForItemEvent('add');
var recognizerPromise = waitForRecognizer();
// Fake recognizer response
Zotero.HTTP.mock = sinon.FakeXMLHttpRequest;
var server = sinon.fakeServer.create();
server.autoRespond = true;
setHTTPResponse(
server,
ZOTERO_CONFIG.RECOGNIZE_URL,
{
method: 'POST',
url: 'recognize',
status: 200,
headers: {
'Content-Type': 'application/json'
},
json: {
title: 'Test',
authors: []
}
}
);
itemsView.drop(0, -1, {
dropEffect: 'copy',
effectAllowed: 'copy',
types: {
contains: function (type) {
return type == 'text/x-moz-url';
}
},
getData: function (type) {
if (type == 'text/x-moz-url') {
return pdfURL;
}
},
mozItemCount: 1,
})
var itemIDs = await promise;
var item = Zotero.Items.get(itemIDs[0]);
var progressWindow = await recognizerPromise;
progressWindow.close();
Zotero.RecognizePDF.cancel();
assert.isFalse(item.isTopLevelItem());
Zotero.HTTP.mock = null;
});
it("should rename a stored child attachment using parent metadata if no existing file attachments and pref enabled", async function () {
var view = zp.itemsView;
var parentTitle = Zotero.Utilities.randomString();

View file

@ -305,17 +305,36 @@ describe("Connector Server", function () {
assert.equal(item.getField('title'), 'Title');
});
it("should save a PDF to the current selected collection", function* () {
var collection = yield createDataObject('collection');
yield waitForItemsLoad(win);
it("should save a PDF to the current selected collection and retrieve metadata", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
var file = getTestDataDirectory();
file.append('test.pdf');
httpd.registerFile("/test.pdf", file);
var ids;
var promise = waitForItemEvent('add');
yield Zotero.HTTP.request(
var recognizerPromise = waitForRecognizer();
var origRequest = Zotero.HTTP.request.bind(Zotero.HTTP);
var called = 0;
var stub = sinon.stub(Zotero.HTTP, 'request').callsFake(function (method, url, options) {
// Forward saveSnapshot request
if (url.endsWith('saveSnapshot')) {
return origRequest(...arguments);
}
// Fake recognizer response
return Zotero.Promise.resolve({
getResponseHeader: () => {},
responseText: JSON.stringify({
title: 'Test',
authors: []
})
});
});
await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSnapshot",
{
@ -329,13 +348,20 @@ describe("Connector Server", function () {
}
);
var ids = yield promise;
var ids = await promise;
assert.lengthOf(ids, 1);
var item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.attachmentContentType, 'application/pdf');
assert.isTrue(collection.hasItem(item.id));
var progressWindow = await recognizerPromise;
progressWindow.close();
Zotero.RecognizePDF.cancel();
assert.isFalse(item.isTopLevelItem());
stub.restore();
});
it("should respond with 500 if a read-only library is selected", function* () {