Automatically download open-access PDFs when saving via the connector

If there's no translated PDF or the translated PDF fails and the item
has a DOI, check Zotero's Unpaywall mirror for possible sources and try
to download one of those.

Unlike with "Add Item by Identifier" and "Find Available PDF" in the
item context menu, this does not try the DOI/URL page, since it would
result in more data leakage and most of the time you'd be saving from
the DOI page already. We could consider offering it as an option, but
for it to be useful, you'd have to have an institutional subscription,
be on-campus or connected via VPN (for now), and be saving from
somewhere other than the main page.

A new connector endpoint, sessionProgress, takes the place of
attachmentProgress. Unlike attachmentProgress, sessionProgress can show
new attachments that have been added to the save, and with a little more
work should also be able to show when a parent item has been recognized
for a directly saved PDF.

This also adds support for custom PDF resolvers, available to all PDF
retrieval methods. I'll document those separately.

Closes #1542
This commit is contained in:
Dan Stillman 2018-08-15 03:34:28 -04:00
parent 7a646a292b
commit ce5be0bc75
14 changed files with 1399 additions and 239 deletions

View file

@ -7,6 +7,7 @@
"chrome/content/zotero/include.js", "chrome/content/zotero/include.js",
"chrome/content/zotero/xpcom/citeproc.js", "chrome/content/zotero/xpcom/citeproc.js",
"resource/csl-validator.js", "resource/csl-validator.js",
"resource/jspath.js",
"resource/react.js", "resource/react.js",
"resource/react-dom.js", "resource/react-dom.js",
"resource/bluebird.js", "resource/bluebird.js",

View file

@ -860,10 +860,9 @@ Zotero.Attachments = new function(){
// If the file is supposed to be a PDF directory, fail if it's not // If the file is supposed to be a PDF directory, fail if it's not
let sample = await Zotero.File.getContentsAsync(path, null, 1000); let sample = await Zotero.File.getContentsAsync(path, null, 1000);
if (options.isPDF && Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') { if (options.isPDF && Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
let errString = "Downloaded PDF was not a PDF"; Zotero.debug("Downloaded PDF was not a PDF", 2);
Zotero.debug(errString, 2);
Zotero.debug(sample, 3); Zotero.debug(sample, 3);
throw new Error(errString); throw new this.InvalidPDFException();
} }
} }
catch (e) { catch (e) {
@ -878,6 +877,13 @@ Zotero.Attachments = new function(){
}; };
this.InvalidPDFException = function() {
this.message = "Downloaded PDF was not a PDF";
this.stack = new Error().stack;
};
this.InvalidPDFException.prototype = Object.create(Error.prototype);
this.canFindPDFForItem = function (item) { this.canFindPDFForItem = function (item) {
return item.isRegularItem() return item.isRegularItem()
&& (!!item.getField('DOI') || !!item.getField('url')) && (!!item.getField('DOI') || !!item.getField('url'))
@ -889,24 +895,26 @@ Zotero.Attachments = new function(){
* Look for an available PDF for an item and add it as an attachment * Look for an available PDF for an item and add it as an attachment
* *
* @param {Zotero.Item} item * @param {Zotero.Item} item
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful * @param {String[]} [methods=['doi', 'url', 'oa', 'custom']]
* @return {Object[]} - An array of urlResolvers (see downloadFirstAvailableFile())
*/ */
this.addAvailablePDF = async function (item, modes = ['doi', 'url', 'oa']) { this.getPDFResolvers = function (item, methods = ['doi', 'url', 'oa', 'custom'], automatic) {
Zotero.debug("Looking for available PDFs"); var useDOI = methods.includes('doi');
var useURL = methods.includes('url');
var useOA = methods.includes('oa');
var useCustom = methods.includes('custom');
var useDOI = modes.includes('doi'); var resolvers = [];
var useURL = modes.includes('url'); var doi = item.getField('DOI');
var useOA = modes.includes('oa'); doi = Zotero.Utilities.cleanDOI(doi);
var urlObjects = []; if (useDOI && doi) {
doi = Zotero.Utilities.cleanDOI(doi);
if (useDOI) {
let doi = item.getField('DOI');
if (doi) { if (doi) {
doi = Zotero.Utilities.cleanDOI(doi); resolvers.push({
if (doi) { pageURL: 'https://doi.org/' + doi,
urlObjects.push({ pageURL: 'https://doi.org/' + doi }); accessMethod: 'doi'
} });
} }
} }
@ -915,32 +923,180 @@ Zotero.Attachments = new function(){
if (url) { if (url) {
url = Zotero.Utilities.cleanURL(url); url = Zotero.Utilities.cleanURL(url);
if (url) { if (url) {
urlObjects.push({ pageURL: url }); resolvers.push({
pageURL: url,
accessMethod: 'url'
});
} }
} }
} }
if (useOA) { if (useOA && doi) {
urlObjects.push(async function () { resolvers.push(async function () {
var doi = item.getField('DOI'); let urls = await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi);
if (!doi) { return urls.map((o) => {
return []; return {
} url: o.url,
try { pageURL: o.pageURL,
return await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi); articleVersion: o.version,
} accessMethod: 'oa'
catch (e) { };
Zotero.logError(e); });
return [];
}
}); });
} }
if (!urlObjects.length) { if (useCustom && doi) {
return false; let customResolvers;
try {
customResolvers = Zotero.Prefs.get('findPDFs.resolvers');
if (customResolvers) {
customResolvers = JSON.parse(customResolvers);
}
}
catch (e) {
Zotero.debug("Error parsing custom PDF resolvers", 2);
Zotero.debug(e, 2);
}
if (customResolvers) {
// Handle single object instead of array
if (!Array.isArray(customResolvers) && customResolvers.method) {
customResolvers = [customResolvers];
}
if (Array.isArray(customResolvers)) {
// Only include resolvers that have opted into automatic processing
if (automatic) {
customResolvers = customResolvers.filter(r => r.automatic);
}
for (let resolver of customResolvers) {
try {
let {
name,
method,
url,
mode,
selector,
// HTML
attribute,
index,
// JSON
mappings
} = resolver;
if (!name) {
throw new Error("'name' not provided");
}
if (!['GET', 'POST'].includes(method.toUpperCase())) {
throw new Error("'method' must be 'GET' or 'POST'");
}
if (!url) {
throw new Error("'url' not provided");
}
if (!url.includes('{doi}')) {
throw new Error("'url' must include '{doi}'");
}
if (!['html', 'json'].includes(mode.toLowerCase())) {
throw new Error("'mode' must be 'html' or 'json'");
}
if (!selector) {
throw new Error("'selector' not provided");
}
url = url.replace(/\{doi}/, doi);
resolvers.push(async function () {
Zotero.debug(`Looking for PDFs for ${doi} via ${name}`);
var req = await Zotero.HTTP.request(
method.toUpperCase(),
url,
{
responseType: mode == 'json' ? 'json' : 'document',
timeout: 5000
}
);
if (mode == 'html') {
let doc = req.response;
let elem = index
? doc.querySelectorAll(selector).item(index)
: doc.querySelector(selector);
if (!elem) return [];
let val = attribute
? elem.getAttribute(attribute)
: elem.textContent;
if (!val) return [];
return [{
accessMethod: name,
url: val,
referrer: url,
}];
}
else if (mode == 'json') {
let jspath = require('resource://zotero/jspath.js');
let json = req.response;
let results = jspath.apply(selector, json);
// If mappings for 'url' and 'pageURL' are supplied,
// extract properties from each object in the array
if (mappings) {
let mappedResults = [];
for (let result of results) {
if (typeof result != 'object') continue;
let mappedResult = {};
for (let field in mappings) {
if (!['url', 'pageURL'].includes(field)) continue;
if (result[mappings[field]]) {
mappedResult[field] = result[mappings[field]];
}
}
mappedResults.push(mappedResult);
}
results = mappedResults;
}
// Otherwise just treat each array entry as the URL
else {
results = results
.filter(url => typeof url == 'string')
.map(url => ({ url }));
}
return results.map((o) => {
return Object.assign(
o,
{
accessMethod: name,
referrer: url
}
);
});
}
});
}
catch (e) {
Zotero.debug("Error parsing PDF resolver", 2);
Zotero.debug(e, 2);
Zotero.debug(resolver, 2);
}
}
}
}
} }
return this.addPDFFromURLs(item, urlObjects); return resolvers;
};
/**
* Look for an available PDF for an item and add it as an attachment
*
* @param {Zotero.Item} item
* @param {String[]} [methods=['doi', 'url', 'oa', 'custom']]
* @return {Zotero.Item|false} - New Zotero.Item, or false if unsuccessful
*/
this.addAvailablePDF = async function (item, methods = ['doi', 'url', 'oa', 'custom']) {
Zotero.debug("Looking for available PDFs");
return this.addPDFFromURLs(item, this.getPDFResolvers(...arguments));
}; };
@ -948,10 +1104,13 @@ Zotero.Attachments = new function(){
* Try to add a PDF to an item from a set of possible URLs * Try to add a PDF to an item from a set of possible URLs
* *
* @param {Zotero.Item} item * @param {Zotero.Item} item
* @param {(String|Object|Function)[]} urlObjects - See downloadFirstAvailableFile() * @param {(String|Object|Function)[]} urlResolvers - See downloadFirstAvailableFile()
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful * @param {Object} [options]
* @param {Function} [options.onAccessMethodStart] - Function to run when a new access method
* is started, taking the access method name as an argument
* @return {Zotero.Item|false} - New Zotero.Item, or false if unsuccessful
*/ */
this.addPDFFromURLs = async function (item, urlObjects) { this.addPDFFromURLs = async function (item, urlResolvers, options = {}) {
var fileBaseName = this.getFileBaseNameFromItem(item); var fileBaseName = this.getFileBaseNameFromItem(item);
var tmpDir; var tmpDir;
var tmpFile; var tmpFile;
@ -959,8 +1118,13 @@ Zotero.Attachments = new function(){
try { try {
tmpDir = (await this.createTemporaryStorageDirectory()).path; tmpDir = (await this.createTemporaryStorageDirectory()).path;
tmpFile = OS.Path.join(tmpDir, fileBaseName + '.pdf'); tmpFile = OS.Path.join(tmpDir, fileBaseName + '.pdf');
let { url, index } = await this.downloadFirstAvailableFile( let { url, props } = await this.downloadFirstAvailableFile(
urlObjects, tmpFile, { isPDF: true } urlResolvers,
tmpFile,
{
isPDF: true,
onAccessMethodStart: options.onAccessMethodStart
}
); );
if (url) { if (url) {
attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({ attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({
@ -970,7 +1134,7 @@ Zotero.Attachments = new function(){
url, url,
contentType: 'application/pdf', contentType: 'application/pdf',
parentItemID: item.id, parentItemID: item.id,
articleVersion: urlObjects[index].version articleVersion: props.articleVersion
}); });
} }
else { else {
@ -993,45 +1157,68 @@ Zotero.Attachments = new function(){
* *
* URLs are only tried once. * URLs are only tried once.
* *
* @param {(String|Object|Function)[]} urlObjects - An array of URLs, objects, or functions * @param {(String|Object|Function)[]} urlResolvers - An array of URLs, objects, or functions
* that return arrays of objects. Objects can contain 'url' and/or 'pageURL', which is a * that return arrays of objects. Objects should contain 'url' and/or 'pageURL' (the latter
* webpage that might contain a translatable PDF link. Functions that return promises are * being a webpage that might contain a translatable PDF link), 'accessMethod' (which will
* waited for, and functions aren't called unless a file hasn't yet been found from an * be displayed in the save popup), and an optional 'articleVersion' ('submittedVersion',
* earlier entry. * 'acceptedVersion', or 'publishedVersion'). Functions that return promises are waited for,
* and functions aren't called unless a file hasn't yet been found from an earlier entry.
* @param {String} path - Path to save file to * @param {String} path - Path to save file to
* @param {Object} [options] - Options to pass to this.downloadFile() * @param {Object} [options] - Options to pass to this.downloadFile()
* @return {Object|false} - Object with successful 'url' and 'index' from original array, or * @return {Object|false} - Object with successful 'url' and 'props' from the associated urlResolver,
* false if no file could be downloaded * or false if no file could be downloaded
*/ */
this.downloadFirstAvailableFile = async function (urlObjects, path, options) { this.downloadFirstAvailableFile = async function (urlResolvers, path, options) {
const maxURLs = 6;
const schemeRE = /^(http:)?\/\//;
// Operate on copy, since we might change things // Operate on copy, since we might change things
urlObjects = [...urlObjects]; urlResolvers = [...urlResolvers];
// Don't try the same URL more than once // Don't try the same URL more than once
var triedURLs = new Set(); var triedURLs = new Set();
var triedPages = new Set(); var triedPages = new Set();
for (let i = 0; i < urlObjects.length; i++) { for (let i = 0; i < urlResolvers.length; i++) {
let urlObject = urlObjects[i]; let urlResolver = urlResolvers[i];
if (typeof urlObject == 'function') { if (typeof urlResolver == 'function') {
urlObject = await urlObject(); try {
urlObjects.splice(i, 1, ...urlObject); urlResolver = await urlResolver();
urlObject = urlObjects[i];
// No URLs returned from last function
if (!urlObject) {
break;
} }
catch (e) {
Zotero.logError(e);
urlResolver = [];
}
// Don't allow more than 6 URLs from a given resolver
// Among other things, this ignores Unpaywall rows that have a huge number of
// URLs by mistake (as of August 2018).
if (urlResolver.length > maxURLs) {
Zotero.debug(`Keeping ${maxURLs} URLs`);
urlResolver = urlResolver.slice(0, maxURLs);
}
// Splice any URLs from resolver into the array
urlResolvers.splice(i, 1, ...urlResolver);
i--;
continue;
} }
// Accept URL strings in addition to objects // Accept URL strings in addition to objects
if (typeof urlObject == 'string') { if (typeof urlResolver == 'string') {
urlObject = { url: urlObject }; urlResolver = { url: urlResolver };
} }
let url = urlObject.url; let url = urlResolver.url;
let pageURL = urlObject.pageURL; let pageURL = urlResolver.pageURL;
let fromPage = false; let fromPage = false;
// Force URLs to HTTPS. If a request fails because of that, too bad.
if (!Zotero.test) {
if (url) url = url.replace(schemeRE, 'https://');
if (pageURL) pageURL = pageURL.replace(schemeRE, 'https://');
}
// Ignore URLs we've already tried // Ignore URLs we've already tried
if (url && triedURLs.has(url)) { if (url && triedURLs.has(url)) {
Zotero.debug(`PDF at ${url} was already tried -- skipping`); Zotero.debug(`PDF at ${url} was already tried -- skipping`);
@ -1042,12 +1229,24 @@ Zotero.Attachments = new function(){
pageURL = null; pageURL = null;
} }
if (!url && !pageURL) {
continue;
}
if (urlResolver.referrer) {
options.referrer = urlResolver.referrer;
}
if (options.onAccessMethodStart) {
options.onAccessMethodStart(urlResolver.accessMethod);
delete options.onAccessMethod;
}
// Try URL first if available // Try URL first if available
if (url) { if (url) {
triedURLs.add(url); triedURLs.add(url);
try { try {
await this.downloadFile(url, path, options); await this.downloadFile(url, path, options);
return { url, index: i }; return { url, props: urlResolver };
} }
catch (e) { catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`); Zotero.debug(`Error downloading ${url}: ${e}`);
@ -1092,7 +1291,7 @@ Zotero.Attachments = new function(){
let downloadOptions = Object.assign({}, options, { referrer: responseURL }); let downloadOptions = Object.assign({}, options, { referrer: responseURL });
try { try {
await this.downloadFile(url, path, downloadOptions); await this.downloadFile(url, path, downloadOptions);
return { url, index: i }; return { url, props: urlResolver };
} }
catch (e) { catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`); Zotero.debug(`Error downloading ${url}: ${e}`);

View file

@ -152,9 +152,69 @@ Zotero.Server.Connector.SessionManager = {
Zotero.Server.Connector.SaveSession = function (id, action, requestData) { Zotero.Server.Connector.SaveSession = function (id, action, requestData) {
this.id = id; this.id = id;
this.created = new Date(); this.created = new Date();
this.savingDone = false;
this._action = action; this._action = action;
this._requestData = requestData; this._requestData = requestData;
this._items = new Set(); this._items = new Set();
this._progressItems = {};
this._orderedProgressItems = [];
};
Zotero.Server.Connector.SaveSession.prototype.onProgress = function (item, progress, error) {
if (!item.id) {
throw new Error("ID not provided");
}
// Child item
if (item.parent) {
let progressItem = this._progressItems[item.parent];
if (!progressItem) {
throw new Error(`Parent progress item ${item.parent} not found `
+ `for attachment ${item.id}`);
}
let a = progressItem.attachments.find(a => a.id == item.id);
if (!a) {
a = {
id: item.id
};
progressItem.attachments.push(a);
}
a.title = item.title;
a.contentType = item.mimeType;
a.progress = progress;
return;
}
// Top-level item
var o = this._progressItems[item.id];
if (!o) {
o = {
id: item.id
};
this._progressItems[item.id] = o;
this._orderedProgressItems.push(item.id);
}
o.title = item.title;
// PDF being converted to a top-level item after recognition
if (o.itemType == 'attachment' && item.itemType != 'attachment') {
delete o.progress;
delete o.contentType;
}
o.itemType = item.itemType;
o.attachments = item.attachments;
if (item.itemType == 'attachment') {
o.progress = progress;
}
};
Zotero.Server.Connector.SaveSession.prototype.getProgressItem = function (id) {
return this._progressItems[id];
};
Zotero.Server.Connector.SaveSession.prototype.getAllProgress = function () {
return this._orderedProgressItems.map(id => this._progressItems[id]);
}; };
Zotero.Server.Connector.SaveSession.prototype.addItem = async function (item) { Zotero.Server.Connector.SaveSession.prototype.addItem = async function (item) {
@ -315,44 +375,6 @@ Zotero.Server.Connector.SaveSession.prototype._updateRecents = function () {
}; };
Zotero.Server.Connector.AttachmentProgressManager = new function() {
var attachmentsInProgress = new WeakMap(),
attachmentProgress = {},
id = 1;
/**
* Adds attachments to attachment progress manager
*/
this.add = function(attachments) {
for(var i=0; i<attachments.length; i++) {
var attachment = attachments[i];
attachmentsInProgress.set(attachment, (attachment.id = id++));
}
};
/**
* Called on attachment progress
*/
this.onProgress = function(attachment, progress, error) {
attachmentProgress[attachmentsInProgress.get(attachment)] = progress;
};
/**
* Gets progress for a given progressID
*/
this.getProgressForID = function(progressID) {
return progressID in attachmentProgress ? attachmentProgress[progressID] : 0;
};
/**
* Check if we have received progress for a given attachment
*/
this.has = function(attachment) {
return attachmentsInProgress.has(attachment)
&& attachmentsInProgress.get(attachment) in attachmentProgress;
}
};
/** /**
* Lists all available translators, including code for translators that should be run on every page * Lists all available translators, including code for translators that should be run on every page
* *
@ -568,11 +590,11 @@ Zotero.Server.Connector.SavePage.prototype = {
var jsonItems = []; var jsonItems = [];
translate.setHandler("select", function(obj, item, callback) { return me._selectItems(obj, item, callback) }); translate.setHandler("select", function(obj, item, callback) { return me._selectItems(obj, item, callback) });
translate.setHandler("itemDone", function(obj, item, jsonItem) { translate.setHandler("itemDone", function(obj, item, jsonItem) {
Zotero.Server.Connector.AttachmentProgressManager.add(jsonItem.attachments); //Zotero.Server.Connector.AttachmentProgressManager.add(jsonItem.attachments);
jsonItems.push(jsonItem); jsonItems.push(jsonItem);
}); });
translate.setHandler("attachmentProgress", function(obj, attachment, progress, error) { translate.setHandler("attachmentProgress", function(obj, attachment, progress, error) {
Zotero.Server.Connector.AttachmentProgressManager.onProgress(attachment, progress, error); //Zotero.Server.Connector.AttachmentProgressManager.onProgress(attachment, progress, error);
}); });
translate.setHandler("done", function(obj, item) { translate.setHandler("done", function(obj, item) {
Zotero.Browser.deleteHiddenBrowser(me._browser); Zotero.Browser.deleteHiddenBrowser(me._browser);
@ -639,15 +661,36 @@ Zotero.Server.Connector.SaveItems.prototype = {
return new Zotero.Promise((resolve) => { return new Zotero.Promise((resolve) => {
try { try {
this.saveItems( this.saveItems(
session,
targetID, targetID,
requestData, requestData,
function (topLevelItems) { function (topLevelItems) {
// Only return the properties the connector needs
topLevelItems = topLevelItems.map((item) => {
return {
id: item.id,
title: item.title,
itemType: item.itemType,
contentType: item.mimeType,
mimeType: item.mimeType, // TODO: Remove
attachments: item.attachments.map((attachment) => {
return {
id: session.id + '_' + attachment.id, // TODO: Remove prefix
title: attachment.title,
contentType: attachment.contentType,
mimeType: attachment.mimeType, // TODO: Remove
};
})
};
});
resolve([201, "application/json", JSON.stringify({items: topLevelItems})]); resolve([201, "application/json", JSON.stringify({items: topLevelItems})]);
} }
) )
// Add items to session once all attachments have been saved // Add items to session once all attachments have been saved
.then(function (items) { .then(function (items) {
session.addItems(items); session.addItems(items);
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
}); });
} }
catch (e) { catch (e) {
@ -657,9 +700,8 @@ Zotero.Server.Connector.SaveItems.prototype = {
}); });
}), }),
saveItems: async function (target, requestData, onTopLevelItemsDone) { saveItems: async function (session, target, requestData, onTopLevelItemsDone) {
var { library, collection, editable } = Zotero.Server.Connector.resolveTarget(target); var { library, collection, editable } = Zotero.Server.Connector.resolveTarget(target);
var data = requestData.data; var data = requestData.data;
var cookieSandbox = data.uri var cookieSandbox = data.uri
? new Zotero.CookieSandbox( ? new Zotero.CookieSandbox(
@ -673,8 +715,29 @@ Zotero.Server.Connector.SaveItems.prototype = {
cookieSandbox.addCookiesFromHeader(data.detailedCookies); cookieSandbox.addCookiesFromHeader(data.detailedCookies);
} }
var id = 1;
for (let item of data.items) { for (let item of data.items) {
Zotero.Server.Connector.AttachmentProgressManager.add(item.attachments); if (!item.id) {
item.id = id++;
}
if (item.attachments) {
for (let attachment of item.attachments) {
attachment.id = id++;
attachment.parent = item.id;
}
}
// Add parent item to session progress without attachments, which are added later if
// they're saved.
let progressItem = Object.assign(
{},
item,
{
attachments: []
}
);
session.onProgress(progressItem, 0);
} }
var proxy = data.proxy && new Zotero.Proxy(data.proxy); var proxy = data.proxy && new Zotero.Proxy(data.proxy);
@ -691,21 +754,10 @@ Zotero.Server.Connector.SaveItems.prototype = {
}); });
return itemSaver.saveItems( return itemSaver.saveItems(
data.items, data.items,
Zotero.Server.Connector.AttachmentProgressManager.onProgress, function (attachment, progress, error) {
function () { session.onProgress(attachment, progress, error);
// Remove attachments from item.attachments that aren't being saved. We have to },
// clone the items so that we don't mutate the data stored in the session. onTopLevelItemsDone
var savedItems = [...data.items.map(item => Object.assign({}, item))];
for (let item of savedItems) {
item.attachments = item.attachments
.filter(attachment => {
return Zotero.Server.Connector.AttachmentProgressManager.has(attachment);
});
}
if (onTopLevelItemsDone) {
onTopLevelItemsDone(savedItems);
}
}
); );
} }
} }
@ -923,6 +975,56 @@ Zotero.Server.Connector.UpdateSession.prototype = {
} }
}; };
Zotero.Server.Connector.SessionProgress = function() {};
Zotero.Server.Endpoints["/connector/sessionProgress"] = Zotero.Server.Connector.SessionProgress;
Zotero.Server.Connector.SessionProgress.prototype = {
supportedMethods: ["POST"],
supportedDataTypes: ["application/json"],
permitBookmarklet: true,
init: async function (requestData) {
var data = requestData.data
if (!data.sessionID) {
return [400, "application/json", JSON.stringify({ error: "SESSION_ID_NOT_PROVIDED" })];
}
var session = Zotero.Server.Connector.SessionManager.get(data.sessionID);
if (!session) {
Zotero.debug("Can't find session " + data.sessionID, 1);
return [400, "application/json", JSON.stringify({ error: "SESSION_NOT_FOUND" })];
}
return [
200,
"application/json",
JSON.stringify({
items: session.getAllProgress()
.map((item) => {
var newItem = Object.assign({}, item);
if (item.attachments) {
newItem.attachments = item.attachments.map((attachment) => {
return Object.assign(
{},
attachment,
// Prefix id with 'sessionID_'
// TODO: Remove this once support for /attachmentProgress is
// removed and we stop prefixing the ids in the /saveItems
// response
{
id: session.id + '_' + attachment.id
}
);
});
}
return newItem;
}),
done: session.savingDone
})
];
}
};
Zotero.Server.Connector.DelaySync = function () {}; Zotero.Server.Connector.DelaySync = function () {};
Zotero.Server.Endpoints["/connector/delaySync"] = Zotero.Server.Connector.DelaySync; Zotero.Server.Endpoints["/connector/delaySync"] = Zotero.Server.Connector.DelaySync;
Zotero.Server.Connector.DelaySync.prototype = { Zotero.Server.Connector.DelaySync.prototype = {
@ -955,8 +1057,27 @@ Zotero.Server.Connector.Progress.prototype = {
* @param {Function} sendResponseCallback function to send HTTP response * @param {Function} sendResponseCallback function to send HTTP response
*/ */
init: function(data, sendResponseCallback) { init: function(data, sendResponseCallback) {
sendResponseCallback(200, "application/json", sendResponseCallback(
JSON.stringify(data.map(id => Zotero.Server.Connector.AttachmentProgressManager.getProgressForID(id)))); 200,
"application/json",
JSON.stringify(
data.map((id) => {
var [sessionID, progressID] = id.split('_');
var session = Zotero.Server.Connector.SessionManager.get(sessionID);
var items = session.getAllProgress();
for (let item of items) {
for (let attachment of item.attachments) {
// TODO: Change to progressID instead of id once we stop prepending
// the sessionID to support older connector versions
if (attachment.id == progressID) {
return attachment.progress;
}
}
}
return null;
})
)
);
} }
}; };

View file

@ -1625,7 +1625,13 @@ Zotero.Translate.Base.prototype = {
var attachmentsWithProgress = []; var attachmentsWithProgress = [];
function attachmentCallback(attachment, progress, error) { function attachmentCallback(attachment, progress, error) {
var attachmentIndex = this._savingAttachments.indexOf(attachment); // Find by id if available (used in the connector)
if (attachment.id) {
var attachmentIndex = this._savingAttachments.findIndex(x => x.id == attachment.id);
}
else {
var attachmentIndex = this._savingAttachments.indexOf(attachment);
}
if(progress === false || progress === 100) { if(progress === false || progress === 100) {
if(attachmentIndex !== -1) { if(attachmentIndex !== -1) {
this._savingAttachments.splice(attachmentIndex, 1); this._savingAttachments.splice(attachmentIndex, 1);

View file

@ -78,113 +78,278 @@ Zotero.Translate.ItemSaver.ATTACHMENT_MODE_FILE = 2;
Zotero.Translate.ItemSaver.prototype = { Zotero.Translate.ItemSaver.prototype = {
/** /**
* Saves items to Standalone or the server * Saves items to Standalone or the server
* @param items Items in Zotero.Item.toArray() format * @param {Object[]} jsonItems - Items in Zotero.Item.toArray() format
* @param {Function} [attachmentCallback] A callback that receives information about attachment * @param {Function} [attachmentCallback] A callback that receives information about attachment
* save progress. The callback will be called as attachmentCallback(attachment, false, error) * save progress. The callback will be called as attachmentCallback(attachment, false, error)
* on failure or attachmentCallback(attachment, progressPercent) periodically during saving. * on failure or attachmentCallback(attachment, progressPercent) periodically during saving.
* @param {Function} [itemsDoneCallback] A callback that is called once all top-level items are * @param {Function} [itemsDoneCallback] A callback that is called once all top-level items are
* done saving with a list of items. Will include saved notes, but exclude attachments. * done saving with a list of items. Will include saved notes, but exclude attachments.
*/ */
saveItems: Zotero.Promise.coroutine(function* (items, attachmentCallback, itemsDoneCallback) { saveItems: async function (jsonItems, attachmentCallback, itemsDoneCallback) {
let newItems = [], standaloneAttachments = [], childAttachments = []; var items = [];
yield Zotero.DB.executeTransaction(function* () { var standaloneAttachments = [];
for (let iitem=0; iitem<items.length; iitem++) { var childAttachments = [];
let item = items[iitem], newItem, myID; var jsonByItem = new Map();
// Type defaults to "webpage"
let type = (item.itemType ? item.itemType : "webpage"); await Zotero.DB.executeTransaction(async function () {
for (let jsonItem of jsonItems) {
jsonItem = Object.assign({}, jsonItem);
if (type == "note") { // handle notes differently let item;
newItem = yield this._saveNote(item); let itemID;
// Type defaults to "webpage"
let type = jsonItem.itemType || "webpage";
// Handle notes differently
if (type == "note") {
item = await this._saveNote(jsonItem);
} }
// Handle standalone attachments differently // Handle standalone attachments differently
else if (type == "attachment") { else if (type == "attachment") {
if (this._canSaveAttachment(item)) { if (this._canSaveAttachment(jsonItem)) {
standaloneAttachments.push(item); standaloneAttachments.push(jsonItem);
attachmentCallback(item, 0); attachmentCallback(jsonItem, 0);
} }
continue; continue;
} else { }
newItem = new Zotero.Item(type); else {
newItem.libraryID = this._libraryID; item = new Zotero.Item(type);
if (item.creators) this._cleanCreators(item.creators); item.libraryID = this._libraryID;
if(item.tags) item.tags = this._cleanTags(item.tags); if (jsonItem.creators) this._cleanCreators(jsonItem.creators);
if (jsonItem.tags) jsonItem.tags = this._cleanTags(jsonItem.tags);
if (item.accessDate == 'CURRENT_TIMESTAMP') { if (jsonItem.accessDate == 'CURRENT_TIMESTAMP') {
item.accessDate = Zotero.Date.dateToISO(new Date()); jsonItem.accessDate = Zotero.Date.dateToISO(new Date());
} }
// Need to handle these specially. Put them in a separate object to item.fromJSON(this._copyJSONItemForImport(jsonItem));
// avoid a warning from fromJSON()
let specialFields = {
attachments:item.attachments,
notes:item.notes,
seeAlso:item.seeAlso,
id:item.itemID || item.id
};
newItem.fromJSON(this._deleteIrrelevantFields(item));
// deproxify url // deproxify url
if (this._proxy && item.url) { if (this._proxy && jsonItem.url) {
let url = this._proxy.toProper(item.url); let url = this._proxy.toProper(jsonItem.url);
Zotero.debug(`Deproxifying item url ${item.url} with scheme ${this._proxy.scheme} to ${url}`, 5); Zotero.debug(`Deproxifying item url ${jsonItem.url} with scheme ${this._proxy.scheme} to ${url}`, 5);
newItem.setField('url', url); item.setField('url', url);
} }
if (this._collections) { if (this._collections) {
newItem.setCollections(this._collections); item.setCollections(this._collections);
} }
// save item // save item
myID = yield newItem.save(this._saveOptions); itemID = await item.save(this._saveOptions);
// handle notes // handle notes
if (specialFields.notes) { if (jsonItem.notes) {
for (let i=0; i<specialFields.notes.length; i++) { for (let note of jsonItem.notes) {
yield this._saveNote(specialFields.notes[i], myID); await this._saveNote(note, itemID);
} }
item.notes = specialFields.notes;
} }
// handle attachments // handle attachments
if (specialFields.attachments) { if (jsonItem.attachments) {
for (let attachment of specialFields.attachments) { let attachmentsToSave = [];
if (!this._canSaveAttachment(attachment)) { let foundPrimaryPDF = false;
for (let jsonAttachment of jsonItem.attachments) {
if (!this._canSaveAttachment(jsonAttachment)) {
continue; continue;
} }
attachmentCallback(attachment, 0);
childAttachments.push([attachment, myID]); // The first PDF is the primary one. If that one fails to download,
// we might check for an open-access PDF below.
let isPrimaryPDF = false;
if (jsonAttachment.mimeType == 'application/pdf' && !foundPrimaryPDF) {
jsonAttachment.isPrimaryPDF = true;
foundPrimaryPDF = true;
}
attachmentsToSave.push(jsonAttachment);
attachmentCallback(jsonAttachment, 0);
childAttachments.push([jsonAttachment, itemID]);
} }
// Restore the attachments field, since we use it later in jsonItem.attachments = attachmentsToSave;
// translation
item.attachments = specialFields.attachments;
} }
// handle see also // handle see also
this._handleRelated(specialFields, newItem); this._handleRelated(jsonItem, item);
} }
// add to new item list // Add to new item list
newItems.push(newItem); items.push(item);
jsonByItem.set(item, jsonItem);
} }
}.bind(this)); }.bind(this));
// Save standalone attachments
for (let jsonItem of standaloneAttachments) {
let item = await this._saveAttachment(jsonItem, null, attachmentCallback);
if (item) {
items.push(item);
}
}
if (itemsDoneCallback) { if (itemsDoneCallback) {
itemsDoneCallback(newItems.splice()); itemsDoneCallback(items.map(item => jsonByItem.get(item)));
} }
// Handle attachments outside of the transaction, because they can involve downloading // For items with DOIs and without PDFs from the translator, look for possible
for (let item of standaloneAttachments) { // open-access PDFs. There's no guarantee that either translated PDFs or OA PDFs will
let newItem = yield this._saveAttachment(item, null, attachmentCallback); // successfully download, but this lets us update the progress window sooner with
if (newItem) newItems.push(newItem); // possible downloads.
} //
for (let a of childAttachments) { // TODO: Separate pref?
// Workaround for https://bugzilla.mozilla.org/show_bug.cgi?id=449811 (fixed in Fx51?) var pdfResolvers = new Map();
let [item, parentItemID] = a; if (Zotero.Prefs.get('downloadAssociatedFiles')
yield this._saveAttachment(item, parentItemID, attachmentCallback); // TEMP: Limit to dev builds
&& Zotero.isDevBuild) {
for (let item of items) {
let doi = item.getField('DOI');
if (!doi) {
continue;
}
let jsonItem = jsonByItem.get(item);
// Skip items with translated PDF attachments
if (jsonItem.attachments
&& jsonItem.attachments.some(x => x.mimeType == 'application/pdf')) {
continue;
}
try {
let resolvers = this._getPDFResolvers(item);
pdfResolvers.set(item, resolvers);
// If there are possible URLs, create a status line for the PDF
if (resolvers.length) {
let title = Zotero.getString('findPDF.searchingForAvailablePDFs');
let jsonAttachment = this._makeJSONAttachment(jsonItem.id, title);
jsonItem.attachments.push(jsonAttachment);
attachmentCallback(jsonAttachment, 0);
}
}
catch (e) {
Zotero.logError(e);
}
}
} }
return newItems; // Save translated child attachments, and keep track of whether the save was successful
}), var itemIDsWithPDFAttachments = new Set();
for (let [jsonAttachment, parentItemID] of childAttachments) {
let attachment = await this._saveAttachment(
jsonAttachment,
parentItemID,
function (attachment, progress, error) {
// Don't cancel failed primary PDFs until we've tried other methods
if (progress === false && attachment.isPrimaryPDF) {
return;
}
attachmentCallback(...arguments);
}
);
if (attachment && jsonAttachment.isPrimaryPDF) {
itemIDsWithPDFAttachments.add(parentItemID);
}
}
// If a translated PDF attachment wasn't saved successfully, either because there wasn't
// one or there was but it failed, look for another PDF (if enabled)
if (Zotero.Prefs.get('downloadAssociatedFiles')
// TEMP: Limit to dev builds
&& Zotero.isDevBuild) {
for (let item of items) {
// Already have a PDF from translation
if (itemIDsWithPDFAttachments.has(item.id)) {
continue;
}
let jsonItem = jsonByItem.get(item);
// Reuse the existing status line if there is one. This could be a failed
// translator attachment or a possible OA PDF found above.
let jsonAttachment = jsonItem.attachments.find(
x => x.mimeType == 'application/pdf' && x.isPrimaryPDF
);
// We might already have retrieved possible OA URLs above, if there wasn't a PDF
// from the translator. If not, get them now.
let resolvers = pdfResolvers.get(item);
if (!resolvers) {
resolvers = this._getPDFResolvers(item);
}
if (!resolvers.length) {
// If there was an existing status line, use that
if (jsonAttachment) {
attachmentCallback(jsonAttachment, false);
}
continue;
}
// If no status line, add one, since we have something to try
if (!jsonAttachment) {
jsonAttachment = this._makeJSONAttachment(
jsonItem.id, Zotero.getString('findPDF.searchingForAvailablePDFs')
);
}
attachmentCallback(jsonAttachment, 0);
let attachment;
try {
attachment = await Zotero.Attachments.addPDFFromURLs(
item,
resolvers,
{
// When a new access method starts, update the status line
onAccessMethodStart: (method) => {
jsonAttachment.title = this._getPDFTitleForAccessMethod(method);
attachmentCallback(jsonAttachment, 0);
}
}
);
}
catch (e) {
Zotero.logError(e);
attachmentCallback(jsonAttachment, false, e);
continue;
}
if (attachment) {
attachmentCallback(jsonAttachment, 100);
}
else {
attachmentCallback(jsonAttachment, false, "PDF not found");
}
}
}
return items;
},
_makeJSONAttachment: function (parentID, title) {
return {
id: Zotero.Utilities.randomString(),
parent: parentID,
title,
mimeType: 'application/pdf',
isPrimaryPDF: true
};
},
_getPDFTitleForAccessMethod: function (accessMethod) {
if (accessMethod == 'oa') {
return Zotero.getString('findPDF.openAccessPDF');
}
if (accessMethod) {
return Zotero.getString('findPDF.pdfWithMethod', accessMethod);
}
return "PDF";
},
_getPDFResolvers: function (item) {
return Zotero.Attachments.getPDFResolvers(item, ['oa', 'custom']);
},
"saveCollections": Zotero.Promise.coroutine(function* (collections) { "saveCollections": Zotero.Promise.coroutine(function* (collections) {
var collectionsToProcess = collections.slice(); var collectionsToProcess = collections.slice();
@ -240,14 +405,28 @@ Zotero.Translate.ItemSaver.prototype = {
}), }),
/** /**
* Deletes irrelevant fields from an item object to avoid warnings in Item#fromJSON * Create a copy of item JSON without irrelevant fields to avoid warnings in Item#fromJSON
*
* Also delete some things like dateAdded, dateModified, and path that translators * Also delete some things like dateAdded, dateModified, and path that translators
* should not be able to set directly. * should not be able to set directly.
*/ */
"_deleteIrrelevantFields": function(item) { _copyJSONItemForImport: function (item) {
const DELETE_FIELDS = ["attachments", "notes", "dateAdded", "dateModified", "seeAlso", "version", "id", "itemID", "path"]; var newItem = Object.assign({}, item);
for (let i=0; i<DELETE_FIELDS.length; i++) delete item[DELETE_FIELDS[i]]; const fieldsToDelete = [
return item; "attachments",
"notes",
"dateAdded",
"dateModified",
"seeAlso",
"version",
"id",
"itemID",
"path"
];
for (let field of fieldsToDelete) {
delete newItem[field];
}
return newItem;
}, },
@ -290,7 +469,7 @@ Zotero.Translate.ItemSaver.prototype = {
* parameters: translator attachment object, percent completion (integer), * parameters: translator attachment object, percent completion (integer),
* and an optional error object * and an optional error object
* *
* @return {Zotero.Primise<Zotero.Item|False} Flase is returned if attachment * @return {Zotero.Promise<Zotero.Item|false} - False is returned if attachment
* was not saved due to error or user settings. * was not saved due to error or user settings.
*/ */
_saveAttachment: Zotero.Promise.coroutine(function* (attachment, parentItemID, attachmentCallback) { _saveAttachment: Zotero.Promise.coroutine(function* (attachment, parentItemID, attachmentCallback) {
@ -325,6 +504,7 @@ Zotero.Translate.ItemSaver.prototype = {
attachmentCallback(attachment, 100); attachmentCallback(attachment, 100);
return newAttachment; return newAttachment;
} catch(e) { } catch(e) {
Zotero.debug("Saving attachment failed", 2);
Zotero.debug(e, 2); Zotero.debug(e, 2);
attachmentCallback(attachment, false, e); attachmentCallback(attachment, false, e);
return false; return false;

View file

@ -936,9 +936,12 @@ Zotero.Utilities.Internal = {
* Note: This uses a private API. Please use Unpaywall directly for non-Zotero projects. * Note: This uses a private API. Please use Unpaywall directly for non-Zotero projects.
* *
* @param {String} doi * @param {String} doi
* @return {String[]} - An array of PDF URLs * @param {Object} [options]
* @param {Number} [options.timeout] - Request timeout in milliseconds
* @return {Object[]} - An array of objects with 'url' and/or 'pageURL' and 'version'
* ('submittedVersion', 'acceptedVersion', 'publishedVersion')
*/ */
getOpenAccessPDFURLs: async function (doi) { getOpenAccessPDFURLs: async function (doi, options = {}) {
doi = Zotero.Utilities.cleanDOI(doi); doi = Zotero.Utilities.cleanDOI(doi);
if (!doi) { if (!doi) {
throw new Error(`Invalid DOI '${doi}'`); throw new Error(`Invalid DOI '${doi}'`);
@ -946,26 +949,25 @@ Zotero.Utilities.Internal = {
Zotero.debug(`Looking for open-access PDFs for ${doi}`); Zotero.debug(`Looking for open-access PDFs for ${doi}`);
var url = ZOTERO_CONFIG.SERVICES_URL + 'oa/search'; var url = ZOTERO_CONFIG.SERVICES_URL + 'oa/search';
var req = await Zotero.HTTP.request('POST', url, { var req = await Zotero.HTTP.request(
headers: { 'POST',
'Content-Type': 'application/json' url,
}, Object.assign(
body: JSON.stringify({ doi }), {
responseType: 'json' headers: {
}); 'Content-Type': 'application/json'
},
body: JSON.stringify({ doi }),
responseType: 'json'
},
options.timeout && {
timeout: options.timeout
}
)
);
var urls = req.response; var urls = req.response;
Zotero.debug(`Found ${urls.length} open-access PDF ${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`); Zotero.debug(`Found ${urls.length} open-access PDF `
// Handle older URL-only format + `${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`);
urls = urls.map(o => typeof o == 'string' ? { url: o } : o);
// Only try a small number of URLs, and ignore Unpaywall rows that have a huge number of
// URLs by mistake (as of August 2018)
let maxURLs = 6;
if (urls.length > maxURLs) {
Zotero.debug(`Keeping ${maxURLs} URLs`);
urls = urls.slice(0, maxURLs);
}
return urls; return urls;
}, },

View file

@ -3858,7 +3858,7 @@ var ZoteroPane = new function()
var icon = 'chrome://zotero/skin/treeitem-attachment-pdf.png'; var icon = 'chrome://zotero/skin/treeitem-attachment-pdf.png';
var progressWin = new Zotero.ProgressWindow(); var progressWin = new Zotero.ProgressWindow();
var title = Zotero.getString('findPDF.headline'); var title = Zotero.getString('findPDF.searchingForAvailablePDFs');
progressWin.changeHeadline(title); progressWin.changeHeadline(title);
var itemProgress = new progressWin.ItemProgress( var itemProgress = new progressWin.ItemProgress(
icon, icon,
@ -3891,7 +3891,7 @@ var ZoteroPane = new function()
itemProgress.setText(Zotero.getString('findPDF.pdfsAdded', successful, successful)); itemProgress.setText(Zotero.getString('findPDF.pdfsAdded', successful, successful));
} }
else { else {
itemProgress.setText("No PDFs found") itemProgress.setText(Zotero.getString('findPDF.noPDFsFound'))
} }
progressWin.startCloseTimer(4000); progressWin.startCloseTimer(4000);

View file

@ -602,9 +602,12 @@ ingester.importFile.intoNewCollection = Import into new collection
ingester.lookup.performing = Performing Lookup… ingester.lookup.performing = Performing Lookup…
ingester.lookup.error = An error occurred while performing lookup for this item. ingester.lookup.error = An error occurred while performing lookup for this item.
findPDF.headline = Searching for available PDFs… findPDF.searchingForAvailablePDFs = Searching for available PDFs…
findPDF.checkingItems = Checking %S item;Checking %S items findPDF.checkingItems = Checking %S item;Checking %S items
findPDF.pdfsAdded = %S PDF added;%S PDFs added findPDF.pdfsAdded = %S PDF added;%S PDFs added
findPDF.openAccessPDF = Open-Access PDF
findPDF.pdfWithMethod = PDF (%S)
findPDF.noPDFsFound = No PDFs found
db.dbCorrupted = The Zotero database '%S' appears to have become corrupted. db.dbCorrupted = The Zotero database '%S' appears to have become corrupted.
db.dbCorrupted.restart = Please restart %S to attempt an automatic restore from the last backup. db.dbCorrupted.restart = Please restart %S to attempt an automatic restore from the last backup.

View file

@ -30,6 +30,7 @@ pref("extensions.zotero.openURL.version","1.0");
pref("extensions.zotero.parseEndNoteMIMETypes",true); pref("extensions.zotero.parseEndNoteMIMETypes",true);
pref("extensions.zotero.automaticSnapshots",true); pref("extensions.zotero.automaticSnapshots",true);
pref("extensions.zotero.downloadAssociatedFiles",true); pref("extensions.zotero.downloadAssociatedFiles",true);
pref("extensions.zotero.findPDFs.resolvers", '[]');
pref("extensions.zotero.reportTranslationFailure",true); pref("extensions.zotero.reportTranslationFailure",true);
pref("extensions.zotero.automaticTags",true); pref("extensions.zotero.automaticTags",true);
pref("extensions.zotero.fontSize", "1.0"); pref("extensions.zotero.fontSize", "1.0");

6
package-lock.json generated
View file

@ -3136,6 +3136,12 @@
"integrity": "sha1-T9kss04OnbPInIYi7PUfm5eMbLk=", "integrity": "sha1-T9kss04OnbPInIYi7PUfm5eMbLk=",
"dev": true "dev": true
}, },
"jspath": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/jspath/-/jspath-0.4.0.tgz",
"integrity": "sha512-2/R8wkot8NCXrppBT/onp+4mcAUAZqtPxsW6aSJU3hrFAVqKqtFYcat2XJZ7inN4RtATUxfv0UQSYOmvJKiIGA==",
"dev": true
},
"jsprim": { "jsprim": {
"version": "1.4.0", "version": "1.4.0",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.0.tgz", "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.0.tgz",

View file

@ -41,6 +41,7 @@
"eslint-plugin-react": "^7.5.1", "eslint-plugin-react": "^7.5.1",
"fs-extra": "^3.0.1", "fs-extra": "^3.0.1",
"globby": "^6.1.0", "globby": "^6.1.0",
"jspath": "^0.4.0",
"mocha": "^3.5.3", "mocha": "^3.5.3",
"multimatch": "^2.1.0", "multimatch": "^2.1.0",
"node-sass": "^4.9.0", "node-sass": "^4.9.0",

1
resource/jspath.js Symbolic link
View file

@ -0,0 +1 @@
../node_modules/jspath/lib/jspath.js

View file

@ -347,6 +347,8 @@ describe("Zotero.Attachments", function() {
var pageURL2 = 'http://website/article2'; var pageURL2 = 'http://website/article2';
var pageURL3 = 'http://website/article3'; var pageURL3 = 'http://website/article3';
var pageURL4 = 'http://website/article4'; var pageURL4 = 'http://website/article4';
var pageURL5 = `http://website/${doi4}`;
var pageURL6 = `http://website/${doi4}/json`;
Components.utils.import("resource://zotero-unit/httpd.js"); Components.utils.import("resource://zotero-unit/httpd.js");
var httpd; var httpd;
@ -354,12 +356,12 @@ describe("Zotero.Attachments", function() {
var baseURL = `http://localhost:${port}/`; var baseURL = `http://localhost:${port}/`;
var pdfURL = `${baseURL}article1/pdf`; var pdfURL = `${baseURL}article1/pdf`;
var pdfSize; var pdfSize;
var stub; var requestStub;
before(async function () { before(async function () {
var origFunc = Zotero.HTTP.request.bind(Zotero.HTTP); var origFunc = Zotero.HTTP.request.bind(Zotero.HTTP);
stub = sinon.stub(Zotero.HTTP, 'request'); requestStub = sinon.stub(Zotero.HTTP, 'request');
stub.callsFake(function (method, url, options) { requestStub.callsFake(function (method, url, options) {
// Page responses // Page responses
var routes = [ var routes = [
// Page 1 contains a PDF // Page 1 contains a PDF
@ -400,6 +402,44 @@ describe("Zotero.Attachments", function() {
}; };
} }
// HTML page with PDF download link
if (url == pageURL5) {
var html = `<html>
<head>
<title>Page Title</title>
</head>
<body>
<a id="pdf-link" href="${pdfURL}">Download PDF</a>
</body>
</html>`;
let parser = new DOMParser();
let doc = parser.parseFromString(html, 'text/html');
doc = Zotero.HTTP.wrapDocument(doc, pageURL5);
return {
status: 200,
response: doc,
responseURL: pageURL5
};
}
// JSON response with PDF download links
if (url == pageURL6) {
return {
status: 200,
response: {
oa_locations: [
{
url_for_landing_page: pageURL1
},
{
url_for_pdf: pdfURL
}
]
},
responseURL: pageURL6
};
}
// OA PDF lookup // OA PDF lookup
if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) { if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) {
let json = JSON.parse(options.body); let json = JSON.parse(options.body);
@ -427,6 +467,8 @@ describe("Zotero.Attachments", function() {
pdfSize = await OS.File.stat( pdfSize = await OS.File.stat(
OS.Path.join(getTestDataDirectory().path, 'test.pdf') OS.Path.join(getTestDataDirectory().path, 'test.pdf')
).size; ).size;
Zotero.Prefs.clear('findPDFs.resolvers');
}); });
beforeEach(async function () { beforeEach(async function () {
@ -439,10 +481,11 @@ describe("Zotero.Attachments", function() {
}); });
afterEach(async function () { afterEach(async function () {
stub.resetHistory(); requestStub.resetHistory();
await new Promise((resolve) => { await new Promise((resolve) => {
httpd.stop(() => resolve()); httpd.stop(() => resolve());
}); });
Zotero.Prefs.clear('findPDFs.resolvers');
}.bind(this)); }.bind(this));
after(() => { after(() => {
@ -457,8 +500,8 @@ describe("Zotero.Attachments", function() {
await item.saveTx(); await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item); var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledOnce); assert.isTrue(requestStub.calledOnce);
assert.isTrue(stub.calledWith('GET', 'https://doi.org/' + doi)); assert.isTrue(requestStub.calledWith('GET', 'https://doi.org/' + doi));
assert.ok(attachment); assert.ok(attachment);
var json = attachment.toJSON(); var json = attachment.toJSON();
assert.equal(json.url, pdfURL); assert.equal(json.url, pdfURL);
@ -475,8 +518,8 @@ describe("Zotero.Attachments", function() {
await item.saveTx(); await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item); var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledOnce); assert.isTrue(requestStub.calledOnce);
assert.isTrue(stub.calledWith('GET', url)); assert.isTrue(requestStub.calledWith('GET', url));
assert.ok(attachment); assert.ok(attachment);
var json = attachment.toJSON(); var json = attachment.toJSON();
assert.equal(json.url, pdfURL); assert.equal(json.url, pdfURL);
@ -493,10 +536,10 @@ describe("Zotero.Attachments", function() {
await item.saveTx(); await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item); var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledTwice); assert.isTrue(requestStub.calledTwice);
var call1 = stub.getCall(0); var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi)); assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = stub.getCall(1); var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search')); assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.ok(attachment); assert.ok(attachment);
@ -515,15 +558,15 @@ describe("Zotero.Attachments", function() {
await item.saveTx(); await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item); var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledThrice); assert.isTrue(requestStub.calledThrice);
// Check the DOI (and get nothing) // Check the DOI (and get nothing)
var call1 = stub.getCall(0); var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi)); assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
// Check the OA resolver and get page 3 // Check the OA resolver and get page 3
var call2 = stub.getCall(1); var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search')); assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
// Check page 3 and find the download URL // Check page 3 and find the download URL
var call3 = stub.getCall(2); var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL3)); assert.isTrue(call3.calledWith('GET', pageURL3));
assert.ok(attachment); assert.ok(attachment);
@ -543,14 +586,123 @@ describe("Zotero.Attachments", function() {
await item.saveTx(); await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item); var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledTwice); assert.isTrue(requestStub.calledTwice);
var call1 = stub.getCall(0); var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi)); assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = stub.getCall(1); var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search')); assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.isFalse(attachment); assert.isFalse(attachment);
}); });
it("should handle a custom resolver in HTML mode", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var resolvers = [{
name: 'Custom',
method: 'get',
url: 'http://website/{doi}',
mode: 'html',
selector: '#pdf-link',
attribute: 'href'
}];
Zotero.Prefs.set('findPDFs.resolvers', JSON.stringify(resolvers));
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledThrice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL5));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("should handle a custom resolver in JSON mode with URL strings", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var resolvers = [{
name: 'Custom',
method: 'get',
url: 'http://website/{doi}/json',
mode: 'json',
selector: '.oa_locations.url_for_pdf'
}];
Zotero.Prefs.set('findPDFs.resolvers', JSON.stringify(resolvers));
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledThrice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL6));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("should handle a custom resolver in JSON mode with mapped properties", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var resolvers = [{
name: 'Custom',
method: 'get',
url: 'http://website/{doi}/json',
mode: 'json',
selector: '.oa_locations',
mappings: {
url: 'url_for_pdf',
pageURL: 'url_for_landing_page',
}
}];
Zotero.Prefs.set('findPDFs.resolvers', JSON.stringify(resolvers));
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.equal(requestStub.callCount, 4);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL6));
var call4 = requestStub.getCall(3);
assert.isTrue(call4.calledWith('GET', pageURL1));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
}); });
describe("#getBaseDirectoryRelativePath()", function () { describe("#getBaseDirectoryRelativePath()", function () {

View file

@ -258,6 +258,493 @@ describe("Connector Server", function () {
var item = Zotero.Items.get(ids[0]); var item = Zotero.Items.get(ids[0]);
assert.equal(item.getField('url'), 'https://www.example.com/path'); assert.equal(item.getField('url'), 'https://www.example.com/path');
}); });
it("shouldn't return an attachment that isn't being saved", async function () {
Zotero.Prefs.set('automaticSnapshots', false);
await selectLibrary(win, Zotero.Libraries.userLibraryID);
await waitForItemsLoad(win);
var body = {
items: [
{
itemType: "webpage",
title: "Title",
creators: [],
attachments: [
{
url: "http://example.com/",
mimeType: "text/html"
}
],
url: "http://example.com/"
}
],
uri: "http://example.com/"
};
var req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify(body),
responseType: 'json'
}
);
Zotero.Prefs.clear('automaticSnapshots');
assert.equal(req.status, 201);
assert.lengthOf(req.response.items, 1);
assert.lengthOf(req.response.items[0].attachments, 0);
});
describe("PDF retrieval", function () {
var oaDOI = '10.1111/abcd';
var nonOADOI = '10.2222/bcde';
var pdfURL;
var badPDFURL;
var stub;
before(function () {
var origFunc = Zotero.HTTP.request.bind(Zotero.HTTP);
stub = sinon.stub(Zotero.HTTP, 'request');
stub.callsFake(function (method, url, options) {
// OA PDF lookup
if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) {
let json = JSON.parse(options.body);
let response = [];
if (json.doi == oaDOI) {
response.push({
url: pdfURL,
version: 'submittedVersion'
});
}
return {
status: 200,
response
};
}
return origFunc(...arguments);
});
});
beforeEach(() => {
pdfURL = testServerPath + '/pdf';
badPDFURL = testServerPath + '/badpdf';
httpd.registerFile(
pdfURL.substr(testServerPath.length),
Zotero.File.pathToFile(OS.Path.join(getTestDataDirectory().path, 'test.pdf'))
);
// PDF URL that's actually an HTML page
httpd.registerFile(
badPDFURL.substr(testServerPath.length),
Zotero.File.pathToFile(OS.Path.join(getTestDataDirectory().path, 'test.html'))
);
});
afterEach(() => {
stub.resetHistory();
});
after(() => {
stub.restore();
});
it("should download a translated PDF", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
var sessionID = Zotero.Utilities.randomString();
// Save item
var itemAddPromise = waitForItemEvent('add');
var saveItemsReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
items: [
{
itemType: 'journalArticle',
title: 'Title',
DOI: nonOADOI,
attachments: [
{
title: "PDF",
url: pdfURL,
mimeType: 'application/pdf'
}
]
}
],
uri: 'http://website/article'
}),
responseType: 'json'
}
);
assert.equal(saveItemsReq.status, 201);
assert.lengthOf(saveItemsReq.response.items, 1);
// Translated attachment should show up in the initial response
assert.lengthOf(saveItemsReq.response.items[0].attachments, 1);
assert.notProperty(saveItemsReq.response.items[0], 'DOI');
assert.notProperty(saveItemsReq.response.items[0].attachments[0], 'progress');
// Check parent item
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
var item = Zotero.Items.get(ids[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'journalArticle');
assert.isTrue(collection.hasItem(item.id));
// Legacy endpoint should show 0
let attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
let progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [0]);
// Wait for the attachment to finish saving
itemAddPromise = waitForItemEvent('add');
var i = 0;
while (i < 3) {
let sessionProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/sessionProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({ sessionID }),
responseType: 'json'
}
);
assert.equal(sessionProgressReq.status, 200);
let response = sessionProgressReq.response;
assert.lengthOf(response.items, 1);
let item = response.items[0];
if (item.attachments.length) {
let attachments = item.attachments;
assert.lengthOf(attachments, 1);
let attachment = attachments[0];
switch (i) {
// Translated PDF in progress
case 0:
if (attachment.title == "PDF"
&& Number.isInteger(attachment.progress)
&& attachment.progress < 100) {
assert.isFalse(response.done);
i++;
}
continue;
// Translated PDF finished
case 1:
if (attachment.title == "PDF" && attachment.progress == 100) {
i++;
}
continue;
// done: true
case 2:
if (response.done) {
i++;
}
continue;
}
}
await Zotero.Promise.delay(10);
}
// Legacy endpoint should show 100
attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [100]);
// Check attachment
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), 'PDF');
});
it("should download open-access PDF if no PDF provided", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
var sessionID = Zotero.Utilities.randomString();
// Save item
var itemAddPromise = waitForItemEvent('add');
var saveItemsReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
items: [
{
itemType: 'journalArticle',
title: 'Title',
DOI: oaDOI,
attachments: []
}
],
uri: 'http://website/article'
}),
responseType: 'json'
}
);
assert.equal(saveItemsReq.status, 201);
assert.lengthOf(saveItemsReq.response.items, 1);
// Attachment shouldn't show up in the initial response
assert.lengthOf(saveItemsReq.response.items[0].attachments, 0);
// Check parent item
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
var item = Zotero.Items.get(ids[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'journalArticle');
assert.isTrue(collection.hasItem(item.id));
// Wait for the attachment to finish saving
itemAddPromise = waitForItemEvent('add');
var wasZero = false;
var was100 = false;
while (true) {
let sessionProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/sessionProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({ sessionID }),
responseType: 'json'
}
);
assert.equal(sessionProgressReq.status, 200);
let response = sessionProgressReq.response;
assert.typeOf(response.items, 'array');
assert.lengthOf(response.items, 1);
let item = response.items[0];
if (item.attachments.length) {
// 'progress' should have started at 0
if (item.attachments[0].progress === 0) {
wasZero = true;
}
else if (!was100 && item.attachments[0].progress == 100) {
if (response.done) {
break;
}
was100 = true;
}
else if (response.done) {
break;
}
}
assert.isFalse(response.done);
await Zotero.Promise.delay(10);
}
assert.isTrue(wasZero);
// Check attachment
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), 'Title.pdf');
});
it("should download open-access PDF if a translated PDF fails", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
var sessionID = Zotero.Utilities.randomString();
// Save item
var itemAddPromise = waitForItemEvent('add');
var saveItemsReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
items: [
{
itemType: 'journalArticle',
title: 'Title',
DOI: oaDOI,
attachments: [
{
title: "PDF",
url: badPDFURL,
mimeType: 'application/pdf'
}
]
}
],
uri: 'http://website/article'
}),
responseType: 'json'
}
);
assert.equal(saveItemsReq.status, 201);
assert.lengthOf(saveItemsReq.response.items, 1);
// Translated attachment should show up in the initial response
assert.lengthOf(saveItemsReq.response.items[0].attachments, 1);
assert.notProperty(saveItemsReq.response.items[0], 'DOI');
assert.notProperty(saveItemsReq.response.items[0].attachments[0], 'progress');
// Check parent item
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
var item = Zotero.Items.get(ids[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'journalArticle');
assert.isTrue(collection.hasItem(item.id));
// Legacy endpoint should show 0
let attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
let progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [0]);
// Wait for the attachment to finish saving
itemAddPromise = waitForItemEvent('add');
var i = 0;
while (i < 4) {
let sessionProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/sessionProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({ sessionID }),
responseType: 'json'
}
);
assert.equal(sessionProgressReq.status, 200);
let response = sessionProgressReq.response;
assert.lengthOf(response.items, 1);
let item = response.items[0];
if (item.attachments.length) {
let attachments = item.attachments;
assert.lengthOf(attachments, 1);
let attachment = attachments[0];
switch (i) {
// Translated PDF in progress
case 0:
if (attachment.title == "PDF"
&& Number.isInteger(attachment.progress)
&& attachment.progress < 100) {
assert.isFalse(response.done);
i++;
}
continue;
// OA PDF in progress
case 1:
if (attachment.title == Zotero.getString('findPDF.openAccessPDF')
&& Number.isInteger(attachment.progress)
&& attachment.progress < 100) {
assert.isFalse(response.done);
i++;
}
continue;
// OA PDF finished
case 2:
if (attachment.progress === 100) {
assert.equal(attachment.title, Zotero.getString('findPDF.openAccessPDF'));
i++;
}
continue;
// done: true
case 3:
if (response.done) {
i++;
}
continue;
}
}
await Zotero.Promise.delay(10);
}
// Legacy endpoint should show 100
attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [100]);
// Check attachment
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), 'Title.pdf');
});
});
}); });
describe("/connector/saveSnapshot", function () { describe("/connector/saveSnapshot", function () {