Automatically download open-access PDFs when saving via the connector

If there's no translated PDF or the translated PDF fails and the item
has a DOI, check Zotero's Unpaywall mirror for possible sources and try
to download one of those.

Unlike with "Add Item by Identifier" and "Find Available PDF" in the
item context menu, this does not try the DOI/URL page, since it would
result in more data leakage and most of the time you'd be saving from
the DOI page already. We could consider offering it as an option, but
for it to be useful, you'd have to have an institutional subscription,
be on-campus or connected via VPN (for now), and be saving from
somewhere other than the main page.

A new connector endpoint, sessionProgress, takes the place of
attachmentProgress. Unlike attachmentProgress, sessionProgress can show
new attachments that have been added to the save, and with a little more
work should also be able to show when a parent item has been recognized
for a directly saved PDF.

This also adds support for custom PDF resolvers, available to all PDF
retrieval methods. I'll document those separately.

Closes #1542
This commit is contained in:
Dan Stillman 2018-08-15 03:34:28 -04:00
parent 7a646a292b
commit ce5be0bc75
14 changed files with 1399 additions and 239 deletions

View file

@ -7,6 +7,7 @@
"chrome/content/zotero/include.js",
"chrome/content/zotero/xpcom/citeproc.js",
"resource/csl-validator.js",
"resource/jspath.js",
"resource/react.js",
"resource/react-dom.js",
"resource/bluebird.js",

View file

@ -860,10 +860,9 @@ Zotero.Attachments = new function(){
// If the file is supposed to be a PDF directory, fail if it's not
let sample = await Zotero.File.getContentsAsync(path, null, 1000);
if (options.isPDF && Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
let errString = "Downloaded PDF was not a PDF";
Zotero.debug(errString, 2);
Zotero.debug("Downloaded PDF was not a PDF", 2);
Zotero.debug(sample, 3);
throw new Error(errString);
throw new this.InvalidPDFException();
}
}
catch (e) {
@ -878,6 +877,13 @@ Zotero.Attachments = new function(){
};
this.InvalidPDFException = function() {
this.message = "Downloaded PDF was not a PDF";
this.stack = new Error().stack;
};
this.InvalidPDFException.prototype = Object.create(Error.prototype);
this.canFindPDFForItem = function (item) {
return item.isRegularItem()
&& (!!item.getField('DOI') || !!item.getField('url'))
@ -889,24 +895,26 @@ Zotero.Attachments = new function(){
* Look for an available PDF for an item and add it as an attachment
*
* @param {Zotero.Item} item
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful
* @param {String[]} [methods=['doi', 'url', 'oa', 'custom']]
* @return {Object[]} - An array of urlResolvers (see downloadFirstAvailableFile())
*/
this.addAvailablePDF = async function (item, modes = ['doi', 'url', 'oa']) {
Zotero.debug("Looking for available PDFs");
this.getPDFResolvers = function (item, methods = ['doi', 'url', 'oa', 'custom'], automatic) {
var useDOI = methods.includes('doi');
var useURL = methods.includes('url');
var useOA = methods.includes('oa');
var useCustom = methods.includes('custom');
var useDOI = modes.includes('doi');
var useURL = modes.includes('url');
var useOA = modes.includes('oa');
var resolvers = [];
var doi = item.getField('DOI');
doi = Zotero.Utilities.cleanDOI(doi);
var urlObjects = [];
if (useDOI) {
let doi = item.getField('DOI');
if (doi) {
if (useDOI && doi) {
doi = Zotero.Utilities.cleanDOI(doi);
if (doi) {
urlObjects.push({ pageURL: 'https://doi.org/' + doi });
}
resolvers.push({
pageURL: 'https://doi.org/' + doi,
accessMethod: 'doi'
});
}
}
@ -915,32 +923,180 @@ Zotero.Attachments = new function(){
if (url) {
url = Zotero.Utilities.cleanURL(url);
if (url) {
urlObjects.push({ pageURL: url });
resolvers.push({
pageURL: url,
accessMethod: 'url'
});
}
}
}
if (useOA) {
urlObjects.push(async function () {
var doi = item.getField('DOI');
if (!doi) {
return [];
}
try {
return await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi);
}
catch (e) {
Zotero.logError(e);
return [];
}
if (useOA && doi) {
resolvers.push(async function () {
let urls = await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi);
return urls.map((o) => {
return {
url: o.url,
pageURL: o.pageURL,
articleVersion: o.version,
accessMethod: 'oa'
};
});
});
}
if (!urlObjects.length) {
return false;
if (useCustom && doi) {
let customResolvers;
try {
customResolvers = Zotero.Prefs.get('findPDFs.resolvers');
if (customResolvers) {
customResolvers = JSON.parse(customResolvers);
}
}
catch (e) {
Zotero.debug("Error parsing custom PDF resolvers", 2);
Zotero.debug(e, 2);
}
if (customResolvers) {
// Handle single object instead of array
if (!Array.isArray(customResolvers) && customResolvers.method) {
customResolvers = [customResolvers];
}
if (Array.isArray(customResolvers)) {
// Only include resolvers that have opted into automatic processing
if (automatic) {
customResolvers = customResolvers.filter(r => r.automatic);
}
return this.addPDFFromURLs(item, urlObjects);
for (let resolver of customResolvers) {
try {
let {
name,
method,
url,
mode,
selector,
// HTML
attribute,
index,
// JSON
mappings
} = resolver;
if (!name) {
throw new Error("'name' not provided");
}
if (!['GET', 'POST'].includes(method.toUpperCase())) {
throw new Error("'method' must be 'GET' or 'POST'");
}
if (!url) {
throw new Error("'url' not provided");
}
if (!url.includes('{doi}')) {
throw new Error("'url' must include '{doi}'");
}
if (!['html', 'json'].includes(mode.toLowerCase())) {
throw new Error("'mode' must be 'html' or 'json'");
}
if (!selector) {
throw new Error("'selector' not provided");
}
url = url.replace(/\{doi}/, doi);
resolvers.push(async function () {
Zotero.debug(`Looking for PDFs for ${doi} via ${name}`);
var req = await Zotero.HTTP.request(
method.toUpperCase(),
url,
{
responseType: mode == 'json' ? 'json' : 'document',
timeout: 5000
}
);
if (mode == 'html') {
let doc = req.response;
let elem = index
? doc.querySelectorAll(selector).item(index)
: doc.querySelector(selector);
if (!elem) return [];
let val = attribute
? elem.getAttribute(attribute)
: elem.textContent;
if (!val) return [];
return [{
accessMethod: name,
url: val,
referrer: url,
}];
}
else if (mode == 'json') {
let jspath = require('resource://zotero/jspath.js');
let json = req.response;
let results = jspath.apply(selector, json);
// If mappings for 'url' and 'pageURL' are supplied,
// extract properties from each object in the array
if (mappings) {
let mappedResults = [];
for (let result of results) {
if (typeof result != 'object') continue;
let mappedResult = {};
for (let field in mappings) {
if (!['url', 'pageURL'].includes(field)) continue;
if (result[mappings[field]]) {
mappedResult[field] = result[mappings[field]];
}
}
mappedResults.push(mappedResult);
}
results = mappedResults;
}
// Otherwise just treat each array entry as the URL
else {
results = results
.filter(url => typeof url == 'string')
.map(url => ({ url }));
}
return results.map((o) => {
return Object.assign(
o,
{
accessMethod: name,
referrer: url
}
);
});
}
});
}
catch (e) {
Zotero.debug("Error parsing PDF resolver", 2);
Zotero.debug(e, 2);
Zotero.debug(resolver, 2);
}
}
}
}
}
return resolvers;
};
/**
* Look for an available PDF for an item and add it as an attachment
*
* @param {Zotero.Item} item
* @param {String[]} [methods=['doi', 'url', 'oa', 'custom']]
* @return {Zotero.Item|false} - New Zotero.Item, or false if unsuccessful
*/
this.addAvailablePDF = async function (item, methods = ['doi', 'url', 'oa', 'custom']) {
Zotero.debug("Looking for available PDFs");
return this.addPDFFromURLs(item, this.getPDFResolvers(...arguments));
};
@ -948,10 +1104,13 @@ Zotero.Attachments = new function(){
* Try to add a PDF to an item from a set of possible URLs
*
* @param {Zotero.Item} item
* @param {(String|Object|Function)[]} urlObjects - See downloadFirstAvailableFile()
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful
* @param {(String|Object|Function)[]} urlResolvers - See downloadFirstAvailableFile()
* @param {Object} [options]
* @param {Function} [options.onAccessMethodStart] - Function to run when a new access method
* is started, taking the access method name as an argument
* @return {Zotero.Item|false} - New Zotero.Item, or false if unsuccessful
*/
this.addPDFFromURLs = async function (item, urlObjects) {
this.addPDFFromURLs = async function (item, urlResolvers, options = {}) {
var fileBaseName = this.getFileBaseNameFromItem(item);
var tmpDir;
var tmpFile;
@ -959,8 +1118,13 @@ Zotero.Attachments = new function(){
try {
tmpDir = (await this.createTemporaryStorageDirectory()).path;
tmpFile = OS.Path.join(tmpDir, fileBaseName + '.pdf');
let { url, index } = await this.downloadFirstAvailableFile(
urlObjects, tmpFile, { isPDF: true }
let { url, props } = await this.downloadFirstAvailableFile(
urlResolvers,
tmpFile,
{
isPDF: true,
onAccessMethodStart: options.onAccessMethodStart
}
);
if (url) {
attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({
@ -970,7 +1134,7 @@ Zotero.Attachments = new function(){
url,
contentType: 'application/pdf',
parentItemID: item.id,
articleVersion: urlObjects[index].version
articleVersion: props.articleVersion
});
}
else {
@ -993,45 +1157,68 @@ Zotero.Attachments = new function(){
*
* URLs are only tried once.
*
* @param {(String|Object|Function)[]} urlObjects - An array of URLs, objects, or functions
* that return arrays of objects. Objects can contain 'url' and/or 'pageURL', which is a
* webpage that might contain a translatable PDF link. Functions that return promises are
* waited for, and functions aren't called unless a file hasn't yet been found from an
* earlier entry.
* @param {(String|Object|Function)[]} urlResolvers - An array of URLs, objects, or functions
* that return arrays of objects. Objects should contain 'url' and/or 'pageURL' (the latter
* being a webpage that might contain a translatable PDF link), 'accessMethod' (which will
* be displayed in the save popup), and an optional 'articleVersion' ('submittedVersion',
* 'acceptedVersion', or 'publishedVersion'). Functions that return promises are waited for,
* and functions aren't called unless a file hasn't yet been found from an earlier entry.
* @param {String} path - Path to save file to
* @param {Object} [options] - Options to pass to this.downloadFile()
* @return {Object|false} - Object with successful 'url' and 'index' from original array, or
* false if no file could be downloaded
* @return {Object|false} - Object with successful 'url' and 'props' from the associated urlResolver,
* or false if no file could be downloaded
*/
this.downloadFirstAvailableFile = async function (urlObjects, path, options) {
this.downloadFirstAvailableFile = async function (urlResolvers, path, options) {
const maxURLs = 6;
const schemeRE = /^(http:)?\/\//;
// Operate on copy, since we might change things
urlObjects = [...urlObjects];
urlResolvers = [...urlResolvers];
// Don't try the same URL more than once
var triedURLs = new Set();
var triedPages = new Set();
for (let i = 0; i < urlObjects.length; i++) {
let urlObject = urlObjects[i];
for (let i = 0; i < urlResolvers.length; i++) {
let urlResolver = urlResolvers[i];
if (typeof urlObject == 'function') {
urlObject = await urlObject();
urlObjects.splice(i, 1, ...urlObject);
urlObject = urlObjects[i];
// No URLs returned from last function
if (!urlObject) {
break;
if (typeof urlResolver == 'function') {
try {
urlResolver = await urlResolver();
}
catch (e) {
Zotero.logError(e);
urlResolver = [];
}
// Don't allow more than 6 URLs from a given resolver
// Among other things, this ignores Unpaywall rows that have a huge number of
// URLs by mistake (as of August 2018).
if (urlResolver.length > maxURLs) {
Zotero.debug(`Keeping ${maxURLs} URLs`);
urlResolver = urlResolver.slice(0, maxURLs);
}
// Splice any URLs from resolver into the array
urlResolvers.splice(i, 1, ...urlResolver);
i--;
continue;
}
// Accept URL strings in addition to objects
if (typeof urlObject == 'string') {
urlObject = { url: urlObject };
if (typeof urlResolver == 'string') {
urlResolver = { url: urlResolver };
}
let url = urlObject.url;
let pageURL = urlObject.pageURL;
let url = urlResolver.url;
let pageURL = urlResolver.pageURL;
let fromPage = false;
// Force URLs to HTTPS. If a request fails because of that, too bad.
if (!Zotero.test) {
if (url) url = url.replace(schemeRE, 'https://');
if (pageURL) pageURL = pageURL.replace(schemeRE, 'https://');
}
// Ignore URLs we've already tried
if (url && triedURLs.has(url)) {
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
@ -1042,12 +1229,24 @@ Zotero.Attachments = new function(){
pageURL = null;
}
if (!url && !pageURL) {
continue;
}
if (urlResolver.referrer) {
options.referrer = urlResolver.referrer;
}
if (options.onAccessMethodStart) {
options.onAccessMethodStart(urlResolver.accessMethod);
delete options.onAccessMethod;
}
// Try URL first if available
if (url) {
triedURLs.add(url);
try {
await this.downloadFile(url, path, options);
return { url, index: i };
return { url, props: urlResolver };
}
catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`);
@ -1092,7 +1291,7 @@ Zotero.Attachments = new function(){
let downloadOptions = Object.assign({}, options, { referrer: responseURL });
try {
await this.downloadFile(url, path, downloadOptions);
return { url, index: i };
return { url, props: urlResolver };
}
catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`);

View file

@ -152,9 +152,69 @@ Zotero.Server.Connector.SessionManager = {
Zotero.Server.Connector.SaveSession = function (id, action, requestData) {
this.id = id;
this.created = new Date();
this.savingDone = false;
this._action = action;
this._requestData = requestData;
this._items = new Set();
this._progressItems = {};
this._orderedProgressItems = [];
};
Zotero.Server.Connector.SaveSession.prototype.onProgress = function (item, progress, error) {
if (!item.id) {
throw new Error("ID not provided");
}
// Child item
if (item.parent) {
let progressItem = this._progressItems[item.parent];
if (!progressItem) {
throw new Error(`Parent progress item ${item.parent} not found `
+ `for attachment ${item.id}`);
}
let a = progressItem.attachments.find(a => a.id == item.id);
if (!a) {
a = {
id: item.id
};
progressItem.attachments.push(a);
}
a.title = item.title;
a.contentType = item.mimeType;
a.progress = progress;
return;
}
// Top-level item
var o = this._progressItems[item.id];
if (!o) {
o = {
id: item.id
};
this._progressItems[item.id] = o;
this._orderedProgressItems.push(item.id);
}
o.title = item.title;
// PDF being converted to a top-level item after recognition
if (o.itemType == 'attachment' && item.itemType != 'attachment') {
delete o.progress;
delete o.contentType;
}
o.itemType = item.itemType;
o.attachments = item.attachments;
if (item.itemType == 'attachment') {
o.progress = progress;
}
};
Zotero.Server.Connector.SaveSession.prototype.getProgressItem = function (id) {
return this._progressItems[id];
};
Zotero.Server.Connector.SaveSession.prototype.getAllProgress = function () {
return this._orderedProgressItems.map(id => this._progressItems[id]);
};
Zotero.Server.Connector.SaveSession.prototype.addItem = async function (item) {
@ -315,44 +375,6 @@ Zotero.Server.Connector.SaveSession.prototype._updateRecents = function () {
};
Zotero.Server.Connector.AttachmentProgressManager = new function() {
var attachmentsInProgress = new WeakMap(),
attachmentProgress = {},
id = 1;
/**
* Adds attachments to attachment progress manager
*/
this.add = function(attachments) {
for(var i=0; i<attachments.length; i++) {
var attachment = attachments[i];
attachmentsInProgress.set(attachment, (attachment.id = id++));
}
};
/**
* Called on attachment progress
*/
this.onProgress = function(attachment, progress, error) {
attachmentProgress[attachmentsInProgress.get(attachment)] = progress;
};
/**
* Gets progress for a given progressID
*/
this.getProgressForID = function(progressID) {
return progressID in attachmentProgress ? attachmentProgress[progressID] : 0;
};
/**
* Check if we have received progress for a given attachment
*/
this.has = function(attachment) {
return attachmentsInProgress.has(attachment)
&& attachmentsInProgress.get(attachment) in attachmentProgress;
}
};
/**
* Lists all available translators, including code for translators that should be run on every page
*
@ -568,11 +590,11 @@ Zotero.Server.Connector.SavePage.prototype = {
var jsonItems = [];
translate.setHandler("select", function(obj, item, callback) { return me._selectItems(obj, item, callback) });
translate.setHandler("itemDone", function(obj, item, jsonItem) {
Zotero.Server.Connector.AttachmentProgressManager.add(jsonItem.attachments);
//Zotero.Server.Connector.AttachmentProgressManager.add(jsonItem.attachments);
jsonItems.push(jsonItem);
});
translate.setHandler("attachmentProgress", function(obj, attachment, progress, error) {
Zotero.Server.Connector.AttachmentProgressManager.onProgress(attachment, progress, error);
//Zotero.Server.Connector.AttachmentProgressManager.onProgress(attachment, progress, error);
});
translate.setHandler("done", function(obj, item) {
Zotero.Browser.deleteHiddenBrowser(me._browser);
@ -639,15 +661,36 @@ Zotero.Server.Connector.SaveItems.prototype = {
return new Zotero.Promise((resolve) => {
try {
this.saveItems(
session,
targetID,
requestData,
function (topLevelItems) {
// Only return the properties the connector needs
topLevelItems = topLevelItems.map((item) => {
return {
id: item.id,
title: item.title,
itemType: item.itemType,
contentType: item.mimeType,
mimeType: item.mimeType, // TODO: Remove
attachments: item.attachments.map((attachment) => {
return {
id: session.id + '_' + attachment.id, // TODO: Remove prefix
title: attachment.title,
contentType: attachment.contentType,
mimeType: attachment.mimeType, // TODO: Remove
};
})
};
});
resolve([201, "application/json", JSON.stringify({items: topLevelItems})]);
}
)
// Add items to session once all attachments have been saved
.then(function (items) {
session.addItems(items);
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
});
}
catch (e) {
@ -657,9 +700,8 @@ Zotero.Server.Connector.SaveItems.prototype = {
});
}),
saveItems: async function (target, requestData, onTopLevelItemsDone) {
saveItems: async function (session, target, requestData, onTopLevelItemsDone) {
var { library, collection, editable } = Zotero.Server.Connector.resolveTarget(target);
var data = requestData.data;
var cookieSandbox = data.uri
? new Zotero.CookieSandbox(
@ -673,8 +715,29 @@ Zotero.Server.Connector.SaveItems.prototype = {
cookieSandbox.addCookiesFromHeader(data.detailedCookies);
}
var id = 1;
for (let item of data.items) {
Zotero.Server.Connector.AttachmentProgressManager.add(item.attachments);
if (!item.id) {
item.id = id++;
}
if (item.attachments) {
for (let attachment of item.attachments) {
attachment.id = id++;
attachment.parent = item.id;
}
}
// Add parent item to session progress without attachments, which are added later if
// they're saved.
let progressItem = Object.assign(
{},
item,
{
attachments: []
}
);
session.onProgress(progressItem, 0);
}
var proxy = data.proxy && new Zotero.Proxy(data.proxy);
@ -691,21 +754,10 @@ Zotero.Server.Connector.SaveItems.prototype = {
});
return itemSaver.saveItems(
data.items,
Zotero.Server.Connector.AttachmentProgressManager.onProgress,
function () {
// Remove attachments from item.attachments that aren't being saved. We have to
// clone the items so that we don't mutate the data stored in the session.
var savedItems = [...data.items.map(item => Object.assign({}, item))];
for (let item of savedItems) {
item.attachments = item.attachments
.filter(attachment => {
return Zotero.Server.Connector.AttachmentProgressManager.has(attachment);
});
}
if (onTopLevelItemsDone) {
onTopLevelItemsDone(savedItems);
}
}
function (attachment, progress, error) {
session.onProgress(attachment, progress, error);
},
onTopLevelItemsDone
);
}
}
@ -923,6 +975,56 @@ Zotero.Server.Connector.UpdateSession.prototype = {
}
};
Zotero.Server.Connector.SessionProgress = function() {};
Zotero.Server.Endpoints["/connector/sessionProgress"] = Zotero.Server.Connector.SessionProgress;
Zotero.Server.Connector.SessionProgress.prototype = {
supportedMethods: ["POST"],
supportedDataTypes: ["application/json"],
permitBookmarklet: true,
init: async function (requestData) {
var data = requestData.data
if (!data.sessionID) {
return [400, "application/json", JSON.stringify({ error: "SESSION_ID_NOT_PROVIDED" })];
}
var session = Zotero.Server.Connector.SessionManager.get(data.sessionID);
if (!session) {
Zotero.debug("Can't find session " + data.sessionID, 1);
return [400, "application/json", JSON.stringify({ error: "SESSION_NOT_FOUND" })];
}
return [
200,
"application/json",
JSON.stringify({
items: session.getAllProgress()
.map((item) => {
var newItem = Object.assign({}, item);
if (item.attachments) {
newItem.attachments = item.attachments.map((attachment) => {
return Object.assign(
{},
attachment,
// Prefix id with 'sessionID_'
// TODO: Remove this once support for /attachmentProgress is
// removed and we stop prefixing the ids in the /saveItems
// response
{
id: session.id + '_' + attachment.id
}
);
});
}
return newItem;
}),
done: session.savingDone
})
];
}
};
Zotero.Server.Connector.DelaySync = function () {};
Zotero.Server.Endpoints["/connector/delaySync"] = Zotero.Server.Connector.DelaySync;
Zotero.Server.Connector.DelaySync.prototype = {
@ -955,8 +1057,27 @@ Zotero.Server.Connector.Progress.prototype = {
* @param {Function} sendResponseCallback function to send HTTP response
*/
init: function(data, sendResponseCallback) {
sendResponseCallback(200, "application/json",
JSON.stringify(data.map(id => Zotero.Server.Connector.AttachmentProgressManager.getProgressForID(id))));
sendResponseCallback(
200,
"application/json",
JSON.stringify(
data.map((id) => {
var [sessionID, progressID] = id.split('_');
var session = Zotero.Server.Connector.SessionManager.get(sessionID);
var items = session.getAllProgress();
for (let item of items) {
for (let attachment of item.attachments) {
// TODO: Change to progressID instead of id once we stop prepending
// the sessionID to support older connector versions
if (attachment.id == progressID) {
return attachment.progress;
}
}
}
return null;
})
)
);
}
};

View file

@ -1625,7 +1625,13 @@ Zotero.Translate.Base.prototype = {
var attachmentsWithProgress = [];
function attachmentCallback(attachment, progress, error) {
// Find by id if available (used in the connector)
if (attachment.id) {
var attachmentIndex = this._savingAttachments.findIndex(x => x.id == attachment.id);
}
else {
var attachmentIndex = this._savingAttachments.indexOf(attachment);
}
if(progress === false || progress === 100) {
if(attachmentIndex !== -1) {
this._savingAttachments.splice(attachmentIndex, 1);

View file

@ -78,113 +78,278 @@ Zotero.Translate.ItemSaver.ATTACHMENT_MODE_FILE = 2;
Zotero.Translate.ItemSaver.prototype = {
/**
* Saves items to Standalone or the server
* @param items Items in Zotero.Item.toArray() format
* @param {Object[]} jsonItems - Items in Zotero.Item.toArray() format
* @param {Function} [attachmentCallback] A callback that receives information about attachment
* save progress. The callback will be called as attachmentCallback(attachment, false, error)
* on failure or attachmentCallback(attachment, progressPercent) periodically during saving.
* @param {Function} [itemsDoneCallback] A callback that is called once all top-level items are
* done saving with a list of items. Will include saved notes, but exclude attachments.
*/
saveItems: Zotero.Promise.coroutine(function* (items, attachmentCallback, itemsDoneCallback) {
let newItems = [], standaloneAttachments = [], childAttachments = [];
yield Zotero.DB.executeTransaction(function* () {
for (let iitem=0; iitem<items.length; iitem++) {
let item = items[iitem], newItem, myID;
// Type defaults to "webpage"
let type = (item.itemType ? item.itemType : "webpage");
saveItems: async function (jsonItems, attachmentCallback, itemsDoneCallback) {
var items = [];
var standaloneAttachments = [];
var childAttachments = [];
var jsonByItem = new Map();
if (type == "note") { // handle notes differently
newItem = yield this._saveNote(item);
await Zotero.DB.executeTransaction(async function () {
for (let jsonItem of jsonItems) {
jsonItem = Object.assign({}, jsonItem);
let item;
let itemID;
// Type defaults to "webpage"
let type = jsonItem.itemType || "webpage";
// Handle notes differently
if (type == "note") {
item = await this._saveNote(jsonItem);
}
// Handle standalone attachments differently
else if (type == "attachment") {
if (this._canSaveAttachment(item)) {
standaloneAttachments.push(item);
attachmentCallback(item, 0);
if (this._canSaveAttachment(jsonItem)) {
standaloneAttachments.push(jsonItem);
attachmentCallback(jsonItem, 0);
}
continue;
} else {
newItem = new Zotero.Item(type);
newItem.libraryID = this._libraryID;
if (item.creators) this._cleanCreators(item.creators);
if(item.tags) item.tags = this._cleanTags(item.tags);
}
else {
item = new Zotero.Item(type);
item.libraryID = this._libraryID;
if (jsonItem.creators) this._cleanCreators(jsonItem.creators);
if (jsonItem.tags) jsonItem.tags = this._cleanTags(jsonItem.tags);
if (item.accessDate == 'CURRENT_TIMESTAMP') {
item.accessDate = Zotero.Date.dateToISO(new Date());
if (jsonItem.accessDate == 'CURRENT_TIMESTAMP') {
jsonItem.accessDate = Zotero.Date.dateToISO(new Date());
}
// Need to handle these specially. Put them in a separate object to
// avoid a warning from fromJSON()
let specialFields = {
attachments:item.attachments,
notes:item.notes,
seeAlso:item.seeAlso,
id:item.itemID || item.id
};
newItem.fromJSON(this._deleteIrrelevantFields(item));
item.fromJSON(this._copyJSONItemForImport(jsonItem));
// deproxify url
if (this._proxy && item.url) {
let url = this._proxy.toProper(item.url);
Zotero.debug(`Deproxifying item url ${item.url} with scheme ${this._proxy.scheme} to ${url}`, 5);
newItem.setField('url', url);
if (this._proxy && jsonItem.url) {
let url = this._proxy.toProper(jsonItem.url);
Zotero.debug(`Deproxifying item url ${jsonItem.url} with scheme ${this._proxy.scheme} to ${url}`, 5);
item.setField('url', url);
}
if (this._collections) {
newItem.setCollections(this._collections);
item.setCollections(this._collections);
}
// save item
myID = yield newItem.save(this._saveOptions);
itemID = await item.save(this._saveOptions);
// handle notes
if (specialFields.notes) {
for (let i=0; i<specialFields.notes.length; i++) {
yield this._saveNote(specialFields.notes[i], myID);
if (jsonItem.notes) {
for (let note of jsonItem.notes) {
await this._saveNote(note, itemID);
}
item.notes = specialFields.notes;
}
// handle attachments
if (specialFields.attachments) {
for (let attachment of specialFields.attachments) {
if (!this._canSaveAttachment(attachment)) {
if (jsonItem.attachments) {
let attachmentsToSave = [];
let foundPrimaryPDF = false;
for (let jsonAttachment of jsonItem.attachments) {
if (!this._canSaveAttachment(jsonAttachment)) {
continue;
}
attachmentCallback(attachment, 0);
childAttachments.push([attachment, myID]);
// The first PDF is the primary one. If that one fails to download,
// we might check for an open-access PDF below.
let isPrimaryPDF = false;
if (jsonAttachment.mimeType == 'application/pdf' && !foundPrimaryPDF) {
jsonAttachment.isPrimaryPDF = true;
foundPrimaryPDF = true;
}
// Restore the attachments field, since we use it later in
// translation
item.attachments = specialFields.attachments;
attachmentsToSave.push(jsonAttachment);
attachmentCallback(jsonAttachment, 0);
childAttachments.push([jsonAttachment, itemID]);
}
jsonItem.attachments = attachmentsToSave;
}
// handle see also
this._handleRelated(specialFields, newItem);
this._handleRelated(jsonItem, item);
}
// add to new item list
newItems.push(newItem);
// Add to new item list
items.push(item);
jsonByItem.set(item, jsonItem);
}
}.bind(this));
// Save standalone attachments
for (let jsonItem of standaloneAttachments) {
let item = await this._saveAttachment(jsonItem, null, attachmentCallback);
if (item) {
items.push(item);
}
}
if (itemsDoneCallback) {
itemsDoneCallback(newItems.splice());
itemsDoneCallback(items.map(item => jsonByItem.get(item)));
}
// Handle attachments outside of the transaction, because they can involve downloading
for (let item of standaloneAttachments) {
let newItem = yield this._saveAttachment(item, null, attachmentCallback);
if (newItem) newItems.push(newItem);
}
for (let a of childAttachments) {
// Workaround for https://bugzilla.mozilla.org/show_bug.cgi?id=449811 (fixed in Fx51?)
let [item, parentItemID] = a;
yield this._saveAttachment(item, parentItemID, attachmentCallback);
// For items with DOIs and without PDFs from the translator, look for possible
// open-access PDFs. There's no guarantee that either translated PDFs or OA PDFs will
// successfully download, but this lets us update the progress window sooner with
// possible downloads.
//
// TODO: Separate pref?
var pdfResolvers = new Map();
if (Zotero.Prefs.get('downloadAssociatedFiles')
// TEMP: Limit to dev builds
&& Zotero.isDevBuild) {
for (let item of items) {
let doi = item.getField('DOI');
if (!doi) {
continue;
}
return newItems;
}),
let jsonItem = jsonByItem.get(item);
// Skip items with translated PDF attachments
if (jsonItem.attachments
&& jsonItem.attachments.some(x => x.mimeType == 'application/pdf')) {
continue;
}
try {
let resolvers = this._getPDFResolvers(item);
pdfResolvers.set(item, resolvers);
// If there are possible URLs, create a status line for the PDF
if (resolvers.length) {
let title = Zotero.getString('findPDF.searchingForAvailablePDFs');
let jsonAttachment = this._makeJSONAttachment(jsonItem.id, title);
jsonItem.attachments.push(jsonAttachment);
attachmentCallback(jsonAttachment, 0);
}
}
catch (e) {
Zotero.logError(e);
}
}
}
// Save translated child attachments, and keep track of whether the save was successful
var itemIDsWithPDFAttachments = new Set();
for (let [jsonAttachment, parentItemID] of childAttachments) {
let attachment = await this._saveAttachment(
jsonAttachment,
parentItemID,
function (attachment, progress, error) {
// Don't cancel failed primary PDFs until we've tried other methods
if (progress === false && attachment.isPrimaryPDF) {
return;
}
attachmentCallback(...arguments);
}
);
if (attachment && jsonAttachment.isPrimaryPDF) {
itemIDsWithPDFAttachments.add(parentItemID);
}
}
// If a translated PDF attachment wasn't saved successfully, either because there wasn't
// one or there was but it failed, look for another PDF (if enabled)
if (Zotero.Prefs.get('downloadAssociatedFiles')
// TEMP: Limit to dev builds
&& Zotero.isDevBuild) {
for (let item of items) {
// Already have a PDF from translation
if (itemIDsWithPDFAttachments.has(item.id)) {
continue;
}
let jsonItem = jsonByItem.get(item);
// Reuse the existing status line if there is one. This could be a failed
// translator attachment or a possible OA PDF found above.
let jsonAttachment = jsonItem.attachments.find(
x => x.mimeType == 'application/pdf' && x.isPrimaryPDF
);
// We might already have retrieved possible OA URLs above, if there wasn't a PDF
// from the translator. If not, get them now.
let resolvers = pdfResolvers.get(item);
if (!resolvers) {
resolvers = this._getPDFResolvers(item);
}
if (!resolvers.length) {
// If there was an existing status line, use that
if (jsonAttachment) {
attachmentCallback(jsonAttachment, false);
}
continue;
}
// If no status line, add one, since we have something to try
if (!jsonAttachment) {
jsonAttachment = this._makeJSONAttachment(
jsonItem.id, Zotero.getString('findPDF.searchingForAvailablePDFs')
);
}
attachmentCallback(jsonAttachment, 0);
let attachment;
try {
attachment = await Zotero.Attachments.addPDFFromURLs(
item,
resolvers,
{
// When a new access method starts, update the status line
onAccessMethodStart: (method) => {
jsonAttachment.title = this._getPDFTitleForAccessMethod(method);
attachmentCallback(jsonAttachment, 0);
}
}
);
}
catch (e) {
Zotero.logError(e);
attachmentCallback(jsonAttachment, false, e);
continue;
}
if (attachment) {
attachmentCallback(jsonAttachment, 100);
}
else {
attachmentCallback(jsonAttachment, false, "PDF not found");
}
}
}
return items;
},
_makeJSONAttachment: function (parentID, title) {
return {
id: Zotero.Utilities.randomString(),
parent: parentID,
title,
mimeType: 'application/pdf',
isPrimaryPDF: true
};
},
_getPDFTitleForAccessMethod: function (accessMethod) {
if (accessMethod == 'oa') {
return Zotero.getString('findPDF.openAccessPDF');
}
if (accessMethod) {
return Zotero.getString('findPDF.pdfWithMethod', accessMethod);
}
return "PDF";
},
_getPDFResolvers: function (item) {
return Zotero.Attachments.getPDFResolvers(item, ['oa', 'custom']);
},
"saveCollections": Zotero.Promise.coroutine(function* (collections) {
var collectionsToProcess = collections.slice();
@ -240,14 +405,28 @@ Zotero.Translate.ItemSaver.prototype = {
}),
/**
* Deletes irrelevant fields from an item object to avoid warnings in Item#fromJSON
* Create a copy of item JSON without irrelevant fields to avoid warnings in Item#fromJSON
*
* Also delete some things like dateAdded, dateModified, and path that translators
* should not be able to set directly.
*/
"_deleteIrrelevantFields": function(item) {
const DELETE_FIELDS = ["attachments", "notes", "dateAdded", "dateModified", "seeAlso", "version", "id", "itemID", "path"];
for (let i=0; i<DELETE_FIELDS.length; i++) delete item[DELETE_FIELDS[i]];
return item;
_copyJSONItemForImport: function (item) {
var newItem = Object.assign({}, item);
const fieldsToDelete = [
"attachments",
"notes",
"dateAdded",
"dateModified",
"seeAlso",
"version",
"id",
"itemID",
"path"
];
for (let field of fieldsToDelete) {
delete newItem[field];
}
return newItem;
},
@ -290,7 +469,7 @@ Zotero.Translate.ItemSaver.prototype = {
* parameters: translator attachment object, percent completion (integer),
* and an optional error object
*
* @return {Zotero.Primise<Zotero.Item|False} Flase is returned if attachment
* @return {Zotero.Promise<Zotero.Item|false} - False is returned if attachment
* was not saved due to error or user settings.
*/
_saveAttachment: Zotero.Promise.coroutine(function* (attachment, parentItemID, attachmentCallback) {
@ -325,6 +504,7 @@ Zotero.Translate.ItemSaver.prototype = {
attachmentCallback(attachment, 100);
return newAttachment;
} catch(e) {
Zotero.debug("Saving attachment failed", 2);
Zotero.debug(e, 2);
attachmentCallback(attachment, false, e);
return false;

View file

@ -936,9 +936,12 @@ Zotero.Utilities.Internal = {
* Note: This uses a private API. Please use Unpaywall directly for non-Zotero projects.
*
* @param {String} doi
* @return {String[]} - An array of PDF URLs
* @param {Object} [options]
* @param {Number} [options.timeout] - Request timeout in milliseconds
* @return {Object[]} - An array of objects with 'url' and/or 'pageURL' and 'version'
* ('submittedVersion', 'acceptedVersion', 'publishedVersion')
*/
getOpenAccessPDFURLs: async function (doi) {
getOpenAccessPDFURLs: async function (doi, options = {}) {
doi = Zotero.Utilities.cleanDOI(doi);
if (!doi) {
throw new Error(`Invalid DOI '${doi}'`);
@ -946,26 +949,25 @@ Zotero.Utilities.Internal = {
Zotero.debug(`Looking for open-access PDFs for ${doi}`);
var url = ZOTERO_CONFIG.SERVICES_URL + 'oa/search';
var req = await Zotero.HTTP.request('POST', url, {
var req = await Zotero.HTTP.request(
'POST',
url,
Object.assign(
{
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ doi }),
responseType: 'json'
});
var urls = req.response;
Zotero.debug(`Found ${urls.length} open-access PDF ${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`);
// Handle older URL-only format
urls = urls.map(o => typeof o == 'string' ? { url: o } : o);
// Only try a small number of URLs, and ignore Unpaywall rows that have a huge number of
// URLs by mistake (as of August 2018)
let maxURLs = 6;
if (urls.length > maxURLs) {
Zotero.debug(`Keeping ${maxURLs} URLs`);
urls = urls.slice(0, maxURLs);
},
options.timeout && {
timeout: options.timeout
}
)
);
var urls = req.response;
Zotero.debug(`Found ${urls.length} open-access PDF `
+ `${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`);
return urls;
},

View file

@ -3858,7 +3858,7 @@ var ZoteroPane = new function()
var icon = 'chrome://zotero/skin/treeitem-attachment-pdf.png';
var progressWin = new Zotero.ProgressWindow();
var title = Zotero.getString('findPDF.headline');
var title = Zotero.getString('findPDF.searchingForAvailablePDFs');
progressWin.changeHeadline(title);
var itemProgress = new progressWin.ItemProgress(
icon,
@ -3891,7 +3891,7 @@ var ZoteroPane = new function()
itemProgress.setText(Zotero.getString('findPDF.pdfsAdded', successful, successful));
}
else {
itemProgress.setText("No PDFs found")
itemProgress.setText(Zotero.getString('findPDF.noPDFsFound'))
}
progressWin.startCloseTimer(4000);

View file

@ -602,9 +602,12 @@ ingester.importFile.intoNewCollection = Import into new collection
ingester.lookup.performing = Performing Lookup…
ingester.lookup.error = An error occurred while performing lookup for this item.
findPDF.headline = Searching for available PDFs…
findPDF.searchingForAvailablePDFs = Searching for available PDFs…
findPDF.checkingItems = Checking %S item;Checking %S items
findPDF.pdfsAdded = %S PDF added;%S PDFs added
findPDF.openAccessPDF = Open-Access PDF
findPDF.pdfWithMethod = PDF (%S)
findPDF.noPDFsFound = No PDFs found
db.dbCorrupted = The Zotero database '%S' appears to have become corrupted.
db.dbCorrupted.restart = Please restart %S to attempt an automatic restore from the last backup.

View file

@ -30,6 +30,7 @@ pref("extensions.zotero.openURL.version","1.0");
pref("extensions.zotero.parseEndNoteMIMETypes",true);
pref("extensions.zotero.automaticSnapshots",true);
pref("extensions.zotero.downloadAssociatedFiles",true);
pref("extensions.zotero.findPDFs.resolvers", '[]');
pref("extensions.zotero.reportTranslationFailure",true);
pref("extensions.zotero.automaticTags",true);
pref("extensions.zotero.fontSize", "1.0");

6
package-lock.json generated
View file

@ -3136,6 +3136,12 @@
"integrity": "sha1-T9kss04OnbPInIYi7PUfm5eMbLk=",
"dev": true
},
"jspath": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/jspath/-/jspath-0.4.0.tgz",
"integrity": "sha512-2/R8wkot8NCXrppBT/onp+4mcAUAZqtPxsW6aSJU3hrFAVqKqtFYcat2XJZ7inN4RtATUxfv0UQSYOmvJKiIGA==",
"dev": true
},
"jsprim": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.0.tgz",

View file

@ -41,6 +41,7 @@
"eslint-plugin-react": "^7.5.1",
"fs-extra": "^3.0.1",
"globby": "^6.1.0",
"jspath": "^0.4.0",
"mocha": "^3.5.3",
"multimatch": "^2.1.0",
"node-sass": "^4.9.0",

1
resource/jspath.js Symbolic link
View file

@ -0,0 +1 @@
../node_modules/jspath/lib/jspath.js

View file

@ -347,6 +347,8 @@ describe("Zotero.Attachments", function() {
var pageURL2 = 'http://website/article2';
var pageURL3 = 'http://website/article3';
var pageURL4 = 'http://website/article4';
var pageURL5 = `http://website/${doi4}`;
var pageURL6 = `http://website/${doi4}/json`;
Components.utils.import("resource://zotero-unit/httpd.js");
var httpd;
@ -354,12 +356,12 @@ describe("Zotero.Attachments", function() {
var baseURL = `http://localhost:${port}/`;
var pdfURL = `${baseURL}article1/pdf`;
var pdfSize;
var stub;
var requestStub;
before(async function () {
var origFunc = Zotero.HTTP.request.bind(Zotero.HTTP);
stub = sinon.stub(Zotero.HTTP, 'request');
stub.callsFake(function (method, url, options) {
requestStub = sinon.stub(Zotero.HTTP, 'request');
requestStub.callsFake(function (method, url, options) {
// Page responses
var routes = [
// Page 1 contains a PDF
@ -400,6 +402,44 @@ describe("Zotero.Attachments", function() {
};
}
// HTML page with PDF download link
if (url == pageURL5) {
var html = `<html>
<head>
<title>Page Title</title>
</head>
<body>
<a id="pdf-link" href="${pdfURL}">Download PDF</a>
</body>
</html>`;
let parser = new DOMParser();
let doc = parser.parseFromString(html, 'text/html');
doc = Zotero.HTTP.wrapDocument(doc, pageURL5);
return {
status: 200,
response: doc,
responseURL: pageURL5
};
}
// JSON response with PDF download links
if (url == pageURL6) {
return {
status: 200,
response: {
oa_locations: [
{
url_for_landing_page: pageURL1
},
{
url_for_pdf: pdfURL
}
]
},
responseURL: pageURL6
};
}
// OA PDF lookup
if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) {
let json = JSON.parse(options.body);
@ -427,6 +467,8 @@ describe("Zotero.Attachments", function() {
pdfSize = await OS.File.stat(
OS.Path.join(getTestDataDirectory().path, 'test.pdf')
).size;
Zotero.Prefs.clear('findPDFs.resolvers');
});
beforeEach(async function () {
@ -439,10 +481,11 @@ describe("Zotero.Attachments", function() {
});
afterEach(async function () {
stub.resetHistory();
requestStub.resetHistory();
await new Promise((resolve) => {
httpd.stop(() => resolve());
});
Zotero.Prefs.clear('findPDFs.resolvers');
}.bind(this));
after(() => {
@ -457,8 +500,8 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledOnce);
assert.isTrue(stub.calledWith('GET', 'https://doi.org/' + doi));
assert.isTrue(requestStub.calledOnce);
assert.isTrue(requestStub.calledWith('GET', 'https://doi.org/' + doi));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
@ -475,8 +518,8 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledOnce);
assert.isTrue(stub.calledWith('GET', url));
assert.isTrue(requestStub.calledOnce);
assert.isTrue(requestStub.calledWith('GET', url));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
@ -493,10 +536,10 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledTwice);
var call1 = stub.getCall(0);
assert.isTrue(requestStub.calledTwice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = stub.getCall(1);
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.ok(attachment);
@ -515,15 +558,15 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledThrice);
assert.isTrue(requestStub.calledThrice);
// Check the DOI (and get nothing)
var call1 = stub.getCall(0);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
// Check the OA resolver and get page 3
var call2 = stub.getCall(1);
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
// Check page 3 and find the download URL
var call3 = stub.getCall(2);
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL3));
assert.ok(attachment);
@ -543,14 +586,123 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledTwice);
var call1 = stub.getCall(0);
assert.isTrue(requestStub.calledTwice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = stub.getCall(1);
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.isFalse(attachment);
});
it("should handle a custom resolver in HTML mode", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var resolvers = [{
name: 'Custom',
method: 'get',
url: 'http://website/{doi}',
mode: 'html',
selector: '#pdf-link',
attribute: 'href'
}];
Zotero.Prefs.set('findPDFs.resolvers', JSON.stringify(resolvers));
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledThrice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL5));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("should handle a custom resolver in JSON mode with URL strings", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var resolvers = [{
name: 'Custom',
method: 'get',
url: 'http://website/{doi}/json',
mode: 'json',
selector: '.oa_locations.url_for_pdf'
}];
Zotero.Prefs.set('findPDFs.resolvers', JSON.stringify(resolvers));
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledThrice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL6));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("should handle a custom resolver in JSON mode with mapped properties", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var resolvers = [{
name: 'Custom',
method: 'get',
url: 'http://website/{doi}/json',
mode: 'json',
selector: '.oa_locations',
mappings: {
url: 'url_for_pdf',
pageURL: 'url_for_landing_page',
}
}];
Zotero.Prefs.set('findPDFs.resolvers', JSON.stringify(resolvers));
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.equal(requestStub.callCount, 4);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL6));
var call4 = requestStub.getCall(3);
assert.isTrue(call4.calledWith('GET', pageURL1));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
});
describe("#getBaseDirectoryRelativePath()", function () {

View file

@ -258,6 +258,493 @@ describe("Connector Server", function () {
var item = Zotero.Items.get(ids[0]);
assert.equal(item.getField('url'), 'https://www.example.com/path');
});
it("shouldn't return an attachment that isn't being saved", async function () {
Zotero.Prefs.set('automaticSnapshots', false);
await selectLibrary(win, Zotero.Libraries.userLibraryID);
await waitForItemsLoad(win);
var body = {
items: [
{
itemType: "webpage",
title: "Title",
creators: [],
attachments: [
{
url: "http://example.com/",
mimeType: "text/html"
}
],
url: "http://example.com/"
}
],
uri: "http://example.com/"
};
var req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify(body),
responseType: 'json'
}
);
Zotero.Prefs.clear('automaticSnapshots');
assert.equal(req.status, 201);
assert.lengthOf(req.response.items, 1);
assert.lengthOf(req.response.items[0].attachments, 0);
});
describe("PDF retrieval", function () {
var oaDOI = '10.1111/abcd';
var nonOADOI = '10.2222/bcde';
var pdfURL;
var badPDFURL;
var stub;
before(function () {
var origFunc = Zotero.HTTP.request.bind(Zotero.HTTP);
stub = sinon.stub(Zotero.HTTP, 'request');
stub.callsFake(function (method, url, options) {
// OA PDF lookup
if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) {
let json = JSON.parse(options.body);
let response = [];
if (json.doi == oaDOI) {
response.push({
url: pdfURL,
version: 'submittedVersion'
});
}
return {
status: 200,
response
};
}
return origFunc(...arguments);
});
});
beforeEach(() => {
pdfURL = testServerPath + '/pdf';
badPDFURL = testServerPath + '/badpdf';
httpd.registerFile(
pdfURL.substr(testServerPath.length),
Zotero.File.pathToFile(OS.Path.join(getTestDataDirectory().path, 'test.pdf'))
);
// PDF URL that's actually an HTML page
httpd.registerFile(
badPDFURL.substr(testServerPath.length),
Zotero.File.pathToFile(OS.Path.join(getTestDataDirectory().path, 'test.html'))
);
});
afterEach(() => {
stub.resetHistory();
});
after(() => {
stub.restore();
});
it("should download a translated PDF", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
var sessionID = Zotero.Utilities.randomString();
// Save item
var itemAddPromise = waitForItemEvent('add');
var saveItemsReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
items: [
{
itemType: 'journalArticle',
title: 'Title',
DOI: nonOADOI,
attachments: [
{
title: "PDF",
url: pdfURL,
mimeType: 'application/pdf'
}
]
}
],
uri: 'http://website/article'
}),
responseType: 'json'
}
);
assert.equal(saveItemsReq.status, 201);
assert.lengthOf(saveItemsReq.response.items, 1);
// Translated attachment should show up in the initial response
assert.lengthOf(saveItemsReq.response.items[0].attachments, 1);
assert.notProperty(saveItemsReq.response.items[0], 'DOI');
assert.notProperty(saveItemsReq.response.items[0].attachments[0], 'progress');
// Check parent item
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
var item = Zotero.Items.get(ids[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'journalArticle');
assert.isTrue(collection.hasItem(item.id));
// Legacy endpoint should show 0
let attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
let progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [0]);
// Wait for the attachment to finish saving
itemAddPromise = waitForItemEvent('add');
var i = 0;
while (i < 3) {
let sessionProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/sessionProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({ sessionID }),
responseType: 'json'
}
);
assert.equal(sessionProgressReq.status, 200);
let response = sessionProgressReq.response;
assert.lengthOf(response.items, 1);
let item = response.items[0];
if (item.attachments.length) {
let attachments = item.attachments;
assert.lengthOf(attachments, 1);
let attachment = attachments[0];
switch (i) {
// Translated PDF in progress
case 0:
if (attachment.title == "PDF"
&& Number.isInteger(attachment.progress)
&& attachment.progress < 100) {
assert.isFalse(response.done);
i++;
}
continue;
// Translated PDF finished
case 1:
if (attachment.title == "PDF" && attachment.progress == 100) {
i++;
}
continue;
// done: true
case 2:
if (response.done) {
i++;
}
continue;
}
}
await Zotero.Promise.delay(10);
}
// Legacy endpoint should show 100
attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [100]);
// Check attachment
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), 'PDF');
});
it("should download open-access PDF if no PDF provided", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
var sessionID = Zotero.Utilities.randomString();
// Save item
var itemAddPromise = waitForItemEvent('add');
var saveItemsReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
items: [
{
itemType: 'journalArticle',
title: 'Title',
DOI: oaDOI,
attachments: []
}
],
uri: 'http://website/article'
}),
responseType: 'json'
}
);
assert.equal(saveItemsReq.status, 201);
assert.lengthOf(saveItemsReq.response.items, 1);
// Attachment shouldn't show up in the initial response
assert.lengthOf(saveItemsReq.response.items[0].attachments, 0);
// Check parent item
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
var item = Zotero.Items.get(ids[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'journalArticle');
assert.isTrue(collection.hasItem(item.id));
// Wait for the attachment to finish saving
itemAddPromise = waitForItemEvent('add');
var wasZero = false;
var was100 = false;
while (true) {
let sessionProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/sessionProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({ sessionID }),
responseType: 'json'
}
);
assert.equal(sessionProgressReq.status, 200);
let response = sessionProgressReq.response;
assert.typeOf(response.items, 'array');
assert.lengthOf(response.items, 1);
let item = response.items[0];
if (item.attachments.length) {
// 'progress' should have started at 0
if (item.attachments[0].progress === 0) {
wasZero = true;
}
else if (!was100 && item.attachments[0].progress == 100) {
if (response.done) {
break;
}
was100 = true;
}
else if (response.done) {
break;
}
}
assert.isFalse(response.done);
await Zotero.Promise.delay(10);
}
assert.isTrue(wasZero);
// Check attachment
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), 'Title.pdf');
});
it("should download open-access PDF if a translated PDF fails", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
var sessionID = Zotero.Utilities.randomString();
// Save item
var itemAddPromise = waitForItemEvent('add');
var saveItemsReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
items: [
{
itemType: 'journalArticle',
title: 'Title',
DOI: oaDOI,
attachments: [
{
title: "PDF",
url: badPDFURL,
mimeType: 'application/pdf'
}
]
}
],
uri: 'http://website/article'
}),
responseType: 'json'
}
);
assert.equal(saveItemsReq.status, 201);
assert.lengthOf(saveItemsReq.response.items, 1);
// Translated attachment should show up in the initial response
assert.lengthOf(saveItemsReq.response.items[0].attachments, 1);
assert.notProperty(saveItemsReq.response.items[0], 'DOI');
assert.notProperty(saveItemsReq.response.items[0].attachments[0], 'progress');
// Check parent item
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
var item = Zotero.Items.get(ids[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'journalArticle');
assert.isTrue(collection.hasItem(item.id));
// Legacy endpoint should show 0
let attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
let progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [0]);
// Wait for the attachment to finish saving
itemAddPromise = waitForItemEvent('add');
var i = 0;
while (i < 4) {
let sessionProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/sessionProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({ sessionID }),
responseType: 'json'
}
);
assert.equal(sessionProgressReq.status, 200);
let response = sessionProgressReq.response;
assert.lengthOf(response.items, 1);
let item = response.items[0];
if (item.attachments.length) {
let attachments = item.attachments;
assert.lengthOf(attachments, 1);
let attachment = attachments[0];
switch (i) {
// Translated PDF in progress
case 0:
if (attachment.title == "PDF"
&& Number.isInteger(attachment.progress)
&& attachment.progress < 100) {
assert.isFalse(response.done);
i++;
}
continue;
// OA PDF in progress
case 1:
if (attachment.title == Zotero.getString('findPDF.openAccessPDF')
&& Number.isInteger(attachment.progress)
&& attachment.progress < 100) {
assert.isFalse(response.done);
i++;
}
continue;
// OA PDF finished
case 2:
if (attachment.progress === 100) {
assert.equal(attachment.title, Zotero.getString('findPDF.openAccessPDF'));
i++;
}
continue;
// done: true
case 3:
if (response.done) {
i++;
}
continue;
}
}
await Zotero.Promise.delay(10);
}
// Legacy endpoint should show 100
attachmentProgressReq = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/attachmentProgress",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify([saveItemsReq.response.items[0].attachments[0].id]),
responseType: 'json'
}
);
assert.equal(attachmentProgressReq.status, 200);
progress = attachmentProgressReq.response;
assert.sameOrderedMembers(progress, [100]);
// Check attachment
var ids = await itemAddPromise;
assert.lengthOf(ids, 1);
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), 'Title.pdf');
});
});
});
describe("/connector/saveSnapshot", function () {