Automatically delay between PDF retrieval requests to the same domain
Delay requests to the same domain by 1 second, respect a Retry-After header if present for 429 and 503, and delay for 10 seconds on 429 or 5xx otherwise.
This commit is contained in:
parent
536c07e9f4
commit
d899134e7c
3 changed files with 590 additions and 100 deletions
|
@ -909,7 +909,11 @@ Zotero.Attachments = new function(){
|
|||
* @param {Boolean} [automatic=false] - Only include custom resolvers with `automatic: true`
|
||||
* @return {Object[]} - An array of urlResolvers (see downloadFirstAvailableFile())
|
||||
*/
|
||||
this.getPDFResolvers = function (item, methods = ['doi', 'url', 'oa', 'custom'], automatic) {
|
||||
this.getPDFResolvers = function (item, methods, automatic) {
|
||||
if (!methods) {
|
||||
methods = ['doi', 'url', 'oa', 'custom'];
|
||||
}
|
||||
|
||||
var useDOI = methods.includes('doi');
|
||||
var useURL = methods.includes('url');
|
||||
var useOA = methods.includes('oa');
|
||||
|
@ -1098,16 +1102,253 @@ Zotero.Attachments = new function(){
|
|||
};
|
||||
|
||||
|
||||
/**
|
||||
* @param {Zotero.Item[]} items
|
||||
* @param {Object} [options]
|
||||
* @param {String[]} [options.methods] - See getPDFResolvers()
|
||||
* @param {Function} [options.onProgress]
|
||||
* @param {Number} [options.sameDomainRequestDelay=1000] - Minimum number of milliseconds
|
||||
* between requests to the same domain (used in tests)
|
||||
*/
|
||||
this.addAvailablePDFs = async function (items, options = {}) {
|
||||
const MAX_CONSECUTIVE_DOMAIN_FAILURES = 5;
|
||||
const SAME_DOMAIN_REQUEST_DELAY = options.sameDomainRequestDelay || 1000;
|
||||
|
||||
var domains = new Map();
|
||||
var queue = items.map((item) => {
|
||||
return {
|
||||
item,
|
||||
urlResolvers: this.getPDFResolvers(item, options.methods),
|
||||
domain: null,
|
||||
continuation: null,
|
||||
result: null,
|
||||
};
|
||||
});
|
||||
|
||||
function getDomainInfo(domain) {
|
||||
var domainInfo = domains.get(domain);
|
||||
if (!domainInfo) {
|
||||
domainInfo = {
|
||||
nextRequestTime: 0,
|
||||
consecutiveFailures: 0
|
||||
};
|
||||
domains.set(domain, domainInfo);
|
||||
}
|
||||
return domainInfo;
|
||||
}
|
||||
|
||||
var completed = 0;
|
||||
var lastQueueStart = new Date();
|
||||
var i = 0;
|
||||
|
||||
//
|
||||
// Process items in the queue
|
||||
//
|
||||
await new Promise((resolve) => {
|
||||
var processNextItem = function () {
|
||||
// All items processed
|
||||
if (completed == queue.length) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
lastQueueStart = new Date();
|
||||
}
|
||||
var current = queue[i++];
|
||||
|
||||
// If we got to the end of the queue, wait until the next time a pending request
|
||||
// is ready to process
|
||||
if (!current) {
|
||||
let nextStart = queue
|
||||
.map(x => x.result === null && getDomainInfo(x.domain).nextRequestTime)
|
||||
.filter(x => x)
|
||||
.reduce((accumulator, currentValue) => {
|
||||
return currentValue < accumulator ? currentValue : accumulator;
|
||||
});
|
||||
|
||||
i = 0;
|
||||
setTimeout(processNextItem, Math.max(0, nextStart - Date.now()));
|
||||
return;
|
||||
}
|
||||
|
||||
// If item was already processed, skip
|
||||
if (current.result !== null) {
|
||||
processNextItem();
|
||||
return;
|
||||
}
|
||||
|
||||
// If processing for a domain was paused and not enough time has passed, skip ahead
|
||||
if (current.domain && getDomainInfo(current.domain).nextRequestTime > Date.now()) {
|
||||
processNextItem();
|
||||
return;
|
||||
}
|
||||
|
||||
// Resume paused item
|
||||
if (current.continuation) {
|
||||
current.continuation();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.canFindPDFForItem(current.item)) {
|
||||
current.result = false;
|
||||
completed++;
|
||||
if (options.onProgress) {
|
||||
options.onProgress(completed, queue.length);
|
||||
}
|
||||
processNextItem();
|
||||
return;
|
||||
}
|
||||
|
||||
// Process item
|
||||
this.addPDFFromURLs(
|
||||
current.item,
|
||||
current.urlResolvers,
|
||||
{
|
||||
onBeforeRequest: async function (url, noDelay) {
|
||||
var domain = urlToDomain(url);
|
||||
|
||||
// Don't delay between subsequent requests to the DOI resolver or
|
||||
// to localhost in tests
|
||||
if (['doi.org', 'localhost'].includes(domain)) {
|
||||
return;
|
||||
}
|
||||
|
||||
var domainInfo = getDomainInfo(domain);
|
||||
|
||||
// If too many requests have failed, stop trying
|
||||
if (domainInfo.consecutiveFailures > MAX_CONSECUTIVE_DOMAIN_FAILURES) {
|
||||
current.result = false;
|
||||
throw new Error(`Too many failed requests for ${urlToDomain(url)}`);
|
||||
}
|
||||
|
||||
// If enough time hasn't passed since the last attempt for this domain,
|
||||
// skip for now and process more items
|
||||
let nextRequestTime = domainInfo.nextRequestTime;
|
||||
if (!noDelay && nextRequestTime > Date.now()) {
|
||||
return new Promise((resolve, reject) => {
|
||||
Zotero.debug(`Delaying request to ${domain} for ${nextRequestTime - Date.now()} ms`);
|
||||
current.domain = domain;
|
||||
current.continuation = () => {
|
||||
if (domainInfo.consecutiveFailures < MAX_CONSECUTIVE_DOMAIN_FAILURES) {
|
||||
resolve();
|
||||
}
|
||||
else {
|
||||
reject(new Error(`Too many failed requests for ${urlToDomain(url)}`));
|
||||
}
|
||||
};
|
||||
processNextItem();
|
||||
});
|
||||
}
|
||||
|
||||
domainInfo.nextRequestTime = Date.now() + SAME_DOMAIN_REQUEST_DELAY;
|
||||
},
|
||||
|
||||
// Reset consecutive failures on successful request
|
||||
onAfterRequest: function (url) {
|
||||
var domain = urlToDomain(url);
|
||||
|
||||
// Ignore localhost in tests
|
||||
if (domain == 'localhost') {
|
||||
return;
|
||||
}
|
||||
|
||||
var domainInfo = getDomainInfo(domain);
|
||||
domainInfo.consecutiveFailures = 0;
|
||||
},
|
||||
|
||||
onRequestError: function (e) {
|
||||
const maxDelay = 3600;
|
||||
|
||||
if (e instanceof Zotero.HTTP.UnexpectedStatusException) {
|
||||
let domain = urlToDomain(e.url);
|
||||
let domainInfo = getDomainInfo(domain);
|
||||
domainInfo.consecutiveFailures++;
|
||||
|
||||
let status = e.status;
|
||||
|
||||
// Retry-After
|
||||
if (status == 429 || status == 503) {
|
||||
let retryAfter = e.xmlhttp.getResponseHeader('Retry-After');
|
||||
if (retryAfter) {
|
||||
Zotero.debug("Got Retry-After: " + retryAfter);
|
||||
if (parseInt(retryAfter) == retryAfter) {
|
||||
if (retryAfter > maxDelay) {
|
||||
Zotero.debug("Retry-After is too long -- skipping request");
|
||||
return false;
|
||||
}
|
||||
domainInfo.nextRequestTime = Date.now() + retryAfter * 1000;
|
||||
return true;
|
||||
}
|
||||
else if (Zotero.Date.isHTTPDate(retryAfter)) {
|
||||
let d = new Date(val);
|
||||
if (d > Date.now() + maxDelay * 1000) {
|
||||
Zotero.debug("Retry-After is too long -- skipping request");
|
||||
return false;
|
||||
}
|
||||
domainInfo.nextRequestTime = d.getTime();
|
||||
return true;
|
||||
}
|
||||
Zotero.debug("Invalid Retry-After value -- skipping request");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If not specified, wait 10 seconds before next request to domain
|
||||
if (e.status == 429 || e.is5xx()) {
|
||||
domainInfo.nextRequestTime = Date.now() + 10000;
|
||||
return true;
|
||||
}
|
||||
|
||||
current.result = false;
|
||||
}
|
||||
else {
|
||||
current.result = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
.then((attachment) => {
|
||||
current.result = attachment;
|
||||
})
|
||||
.catch((e) => {
|
||||
Zotero.logError(e);
|
||||
current.result = false;
|
||||
})
|
||||
// finally() isn't implemented until Firefox 58, but then() is the same here
|
||||
//.finally(() => {
|
||||
.then(function () {
|
||||
completed++;
|
||||
if (options.onProgress) {
|
||||
options.onProgress(completed, queue.length);
|
||||
}
|
||||
processNextItem();
|
||||
});
|
||||
}.bind(this);
|
||||
|
||||
processNextItem();
|
||||
});
|
||||
|
||||
return queue.map(x => x.result);
|
||||
};
|
||||
|
||||
|
||||
function urlToDomain(url) {
|
||||
return Services.io.newURI(url, null, null).host;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Look for an available PDF for an item and add it as an attachment
|
||||
*
|
||||
* @param {Zotero.Item} item
|
||||
* @param {String[]} [methods=['doi', 'url', 'oa', 'custom']]
|
||||
* @param {Object} [options]
|
||||
* @param {String[]} [options.methods] - See getPDFResolvers()
|
||||
* @return {Zotero.Item|false} - New Zotero.Item, or false if unsuccessful
|
||||
*/
|
||||
this.addAvailablePDF = async function (item, methods = ['doi', 'url', 'oa', 'custom']) {
|
||||
this.addAvailablePDF = async function (item, options = {}) {
|
||||
Zotero.debug("Looking for available PDFs");
|
||||
return this.addPDFFromURLs(item, this.getPDFResolvers(...arguments));
|
||||
return this.addPDFFromURLs(item, this.getPDFResolvers(item, options.methods));
|
||||
};
|
||||
|
||||
|
||||
|
@ -1134,7 +1375,9 @@ Zotero.Attachments = new function(){
|
|||
tmpFile,
|
||||
{
|
||||
isPDF: true,
|
||||
onAccessMethodStart: options.onAccessMethodStart
|
||||
onAccessMethodStart: options.onAccessMethodStart,
|
||||
onBeforeRequest: options.onBeforeRequest,
|
||||
onRequestError: options.onRequestError
|
||||
}
|
||||
);
|
||||
if (url) {
|
||||
|
@ -1198,7 +1441,11 @@ Zotero.Attachments = new function(){
|
|||
* 'acceptedVersion', or 'publishedVersion'). Functions that return promises are waited for,
|
||||
* and functions aren't called unless a file hasn't yet been found from an earlier entry.
|
||||
* @param {String} path - Path to save file to
|
||||
* @param {Object} [options] - Options to pass to this.downloadFile()
|
||||
* @param {Object} [options]
|
||||
* @param {Function} [options.onBeforeRequest] - Async function that runs before a request
|
||||
* @param {Function} [options.onAfterRequest] - Function that runs after a request
|
||||
* @param {Function} [options.onRequestError] - Function that runs when a request fails.
|
||||
* Return true to retry request and false to skip.
|
||||
* @return {Object|false} - Object with successful 'url' and 'props' from the associated urlResolver,
|
||||
* or false if no file could be downloaded
|
||||
*/
|
||||
|
@ -1209,12 +1456,42 @@ Zotero.Attachments = new function(){
|
|||
// Operate on copy, since we might change things
|
||||
urlResolvers = [...urlResolvers];
|
||||
|
||||
// Don't try the same URL more than once
|
||||
// Don't try the same normalized URL more than once
|
||||
var triedURLs = new Set();
|
||||
var triedPages = new Set();
|
||||
function normalizeURL(url) {
|
||||
return url.replace(/\?.*/, '');
|
||||
}
|
||||
function isTriedURL(url) {
|
||||
return triedURLs.has(normalizeURL(url));
|
||||
}
|
||||
function addTriedURL(url) {
|
||||
triedURLs.add(normalizeURL(url));
|
||||
}
|
||||
|
||||
// Check a URL against options.onBeforeRequest(), which can delay or cancel the request
|
||||
async function beforeRequest(url, noDelay) {
|
||||
if (options.onBeforeRequest) {
|
||||
await options.onBeforeRequest(url, noDelay);
|
||||
}
|
||||
}
|
||||
|
||||
function afterRequest(url) {
|
||||
if (options.onAfterRequest) {
|
||||
options.onAfterRequest(url);
|
||||
}
|
||||
}
|
||||
|
||||
function handleRequestError(e) {
|
||||
if (options.onRequestError) {
|
||||
return options.onRequestError(e);
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < urlResolvers.length; i++) {
|
||||
let urlResolver = urlResolvers[i];
|
||||
|
||||
// If resolver is a function, run it and then replace it in the resolvers list with
|
||||
// the results
|
||||
if (typeof urlResolver == 'function') {
|
||||
try {
|
||||
urlResolver = await urlResolver();
|
||||
|
@ -1254,11 +1531,11 @@ Zotero.Attachments = new function(){
|
|||
}
|
||||
|
||||
// Ignore URLs we've already tried
|
||||
if (url && triedURLs.has(url)) {
|
||||
if (url && isTriedURL(url)) {
|
||||
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
|
||||
url = null;
|
||||
}
|
||||
if (pageURL && triedPages.has(pageURL)) {
|
||||
if (pageURL && isTriedURL(pageURL)) {
|
||||
Zotero.debug(`Page at ${pageURL} was already tried -- skipping`);
|
||||
pageURL = null;
|
||||
}
|
||||
|
@ -1277,33 +1554,94 @@ Zotero.Attachments = new function(){
|
|||
|
||||
// Try URL first if available
|
||||
if (url) {
|
||||
triedURLs.add(url);
|
||||
try {
|
||||
await this.downloadFile(url, path, options);
|
||||
return { url, props: urlResolver };
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug(`Error downloading ${url}: ${e}`);
|
||||
addTriedURL(url);
|
||||
// Backoff loop
|
||||
let tries = 3;
|
||||
while (tries-- >= 0) {
|
||||
try {
|
||||
await beforeRequest(url);
|
||||
await this.downloadFile(url, path, options);
|
||||
afterRequest(url);
|
||||
return { url, props: urlResolver };
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug(`Error downloading ${url}: ${e}\n\n${e.stack}`);
|
||||
if (handleRequestError(e)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If URL wasn't available or failed, try to get a URL from a page
|
||||
if (pageURL) {
|
||||
triedPages.add(pageURL);
|
||||
addTriedURL(pageURL);
|
||||
url = null;
|
||||
let responseURL;
|
||||
try {
|
||||
Zotero.debug(`Looking for PDF on ${pageURL}`);
|
||||
|
||||
// TODO: Handle redirects manually so we can avoid loading a page we've already
|
||||
// tried
|
||||
let req = await Zotero.HTTP.request("GET", pageURL, { responseType: 'blob' });
|
||||
let redirects = 0;
|
||||
let nextURL = pageURL;
|
||||
let req;
|
||||
let skip = false;
|
||||
let domains = new Set();
|
||||
while (true) {
|
||||
let domain = urlToDomain(nextURL);
|
||||
let noDelay = domains.has(domain);
|
||||
domains.add(domain);
|
||||
|
||||
// Backoff loop
|
||||
let tries = 3;
|
||||
while (tries-- >= 0) {
|
||||
try {
|
||||
await beforeRequest(nextURL, noDelay);
|
||||
req = await Zotero.HTTP.request(
|
||||
'GET',
|
||||
nextURL,
|
||||
{
|
||||
responseType: 'blob',
|
||||
followRedirects: false
|
||||
}
|
||||
);
|
||||
}
|
||||
catch (e) {
|
||||
if (handleRequestError(e)) {
|
||||
// Even if this was initially a same-domain redirect, we should
|
||||
// now obey delays, since we just set one
|
||||
noDelay = false;
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
break;
|
||||
}
|
||||
afterRequest(nextURL);
|
||||
if ([301, 302, 303, 307].includes(req.status)) {
|
||||
let location = req.getResponseHeader('Location');
|
||||
if (!location) {
|
||||
throw new Error("Location header not provided");
|
||||
}
|
||||
nextURL = Services.io.newURI(nextURL, null, null).resolve(location);
|
||||
if (isTriedURL(nextURL)) {
|
||||
Zotero.debug("Redirect URL has already been tried -- skipping");
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (skip) {
|
||||
continue;
|
||||
}
|
||||
let blob = req.response;
|
||||
responseURL = req.responseURL;
|
||||
if (pageURL != responseURL) {
|
||||
Zotero.debug("Redirected to " + responseURL);
|
||||
}
|
||||
triedPages.add(responseURL);
|
||||
addTriedURL(responseURL);
|
||||
|
||||
let contentType = req.getResponseHeader('Content-Type');
|
||||
// If DOI resolves directly to a PDF, save it to disk
|
||||
|
@ -1320,27 +1658,37 @@ Zotero.Attachments = new function(){
|
|||
}
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug(`Error getting PDF from ${pageURL}: ${e}`);
|
||||
Zotero.debug(`Error getting PDF from ${pageURL}: ${e}\n\n${e.stack}`);
|
||||
continue;
|
||||
}
|
||||
if (!url) {
|
||||
Zotero.debug(`No PDF found on ${responseURL}`);
|
||||
continue;
|
||||
}
|
||||
if (triedURLs.has(url)) {
|
||||
if (isTriedURL(url)) {
|
||||
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
|
||||
continue;
|
||||
}
|
||||
triedURLs.add(url);
|
||||
addTriedURL(url);
|
||||
|
||||
// Use the page we loaded as the referrer
|
||||
let downloadOptions = Object.assign({}, options, { referrer: responseURL });
|
||||
try {
|
||||
await this.downloadFile(url, path, downloadOptions);
|
||||
return { url, props: urlResolver };
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug(`Error downloading ${url}: ${e}`);
|
||||
// Backoff loop
|
||||
let tries = 3;
|
||||
while (tries-- >= 0) {
|
||||
try {
|
||||
await beforeRequest(url);
|
||||
await this.downloadFile(url, path, downloadOptions);
|
||||
afterRequest(url);
|
||||
return { url, props: urlResolver };
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug(`Error downloading ${url}: ${e}\n\n${e.stack}`);
|
||||
if (handleRequestError(e)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3708,27 +3708,19 @@ var ZoteroPane = new function()
|
|||
);
|
||||
progressWin.show();
|
||||
|
||||
var successful = 0;
|
||||
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
let item = items[i];
|
||||
if (Zotero.Attachments.canFindPDFForItem(item)) {
|
||||
try {
|
||||
let attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
if (attachment) {
|
||||
successful++;
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.logError(e);
|
||||
var results = await Zotero.Attachments.addAvailablePDFs(
|
||||
items,
|
||||
{
|
||||
onProgress: (progress, progressMax) => {
|
||||
itemProgress.setProgress((progress / progressMax) * 100);
|
||||
}
|
||||
}
|
||||
itemProgress.setProgress(((i + 1) / items.length) * 100);
|
||||
}
|
||||
);
|
||||
|
||||
itemProgress.setProgress(100);
|
||||
itemProgress.setIcon(icon);
|
||||
|
||||
var successful = results.filter(x => x).length;
|
||||
if (successful) {
|
||||
itemProgress.setText(Zotero.getString('findPDF.pdfsAdded', successful, successful));
|
||||
}
|
||||
|
|
|
@ -337,13 +337,14 @@ describe("Zotero.Attachments", function() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("#addAvailablePDF()", function () {
|
||||
describe("PDF Retrieval", function () {
|
||||
var doiPrefix = 'https://doi.org/';
|
||||
var doi1 = '10.1111/abcd';
|
||||
var doi2 = '10.2222/bcde';
|
||||
var doi3 = '10.3333/cdef';
|
||||
var doi4 = '10.4444/defg';
|
||||
var doi5 = '10.5555/efgh';
|
||||
var doi6 = '10.6666/fghi';
|
||||
var pageURL1 = 'http://website/article1';
|
||||
var pageURL2 = 'http://website/article2';
|
||||
var pageURL3 = 'http://website/article3';
|
||||
|
@ -351,6 +352,8 @@ describe("Zotero.Attachments", function() {
|
|||
var pageURL5 = `http://website/${doi4}`;
|
||||
var pageURL6 = `http://website/${doi4}/json`;
|
||||
var pageURL7 = doiPrefix + doi5;
|
||||
var pageURL8 = 'http://website2/article8';
|
||||
var pageURL9 = 'http://website/article9';
|
||||
|
||||
Components.utils.import("resource://zotero-unit/httpd.js");
|
||||
var httpd;
|
||||
|
@ -360,16 +363,30 @@ describe("Zotero.Attachments", function() {
|
|||
var pdfURL = `${baseURL}article1/pdf`;
|
||||
var pdfSize;
|
||||
var requestStub;
|
||||
var requestStubCallTimes = [];
|
||||
var return429 = true;
|
||||
|
||||
function makeGetResponseHeader(headers) {
|
||||
return function (header) {
|
||||
if (headers[header] !== undefined) {
|
||||
return headers[header];
|
||||
}
|
||||
throw new Error("Unimplemented");
|
||||
throw new Error(`Unimplemented header '${header}'`);
|
||||
};
|
||||
}
|
||||
|
||||
function getHTMLPage(includePDF) {
|
||||
return `<html>
|
||||
<head>
|
||||
<title>Page Title</title>
|
||||
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
|
||||
<meta name="citation_title" content="Title"/>
|
||||
<meta name="${includePDF ? 'citation_pdf_url' : 'ignore'}" content="${pdfURL}"/>
|
||||
</head>
|
||||
<body>Body</body>
|
||||
</html>`;
|
||||
}
|
||||
|
||||
function makeHTMLResponseFromType(html, responseType, responseURL) {
|
||||
var response;
|
||||
if (responseType == 'document') {
|
||||
|
@ -403,38 +420,44 @@ describe("Zotero.Attachments", function() {
|
|||
requestStub = sinon.stub(Zotero.HTTP, 'request');
|
||||
requestStub.callsFake(function (method, url, options) {
|
||||
Zotero.debug("Intercepting " + method + " " + url);
|
||||
requestStubCallTimes.push(new Date());
|
||||
|
||||
// Page responses
|
||||
var routes = [
|
||||
// Page 1 contains a PDF
|
||||
[pageURL1, pageURL1, true],
|
||||
// DOI 1 redirects to page 1, which contains a PDF
|
||||
[doiPrefix + doi1, pageURL1, true],
|
||||
[pageURL1, pageURL1, true],
|
||||
// DOI 2 redirects to page 2, which doesn't contain a PDF, but DOI 2 has an
|
||||
// OA entry for the PDF URL
|
||||
[doiPrefix + doi2, pageURL2, false],
|
||||
[pageURL2, pageURL2, false],
|
||||
// DOI 3 redirects to page 2, which doesn't contain a PDF, but DOI 3 contains
|
||||
// an OA entry for page 3, which contains a PDF)
|
||||
[doiPrefix + doi3, pageURL2, false],
|
||||
[pageURL3, pageURL3, true],
|
||||
// DOI 4 redirects to page 4, which doesn't contain a PDF
|
||||
[doiPrefix + doi4, pageURL4, false],
|
||||
[pageURL4, pageURL4, false],
|
||||
// DOI 6 redirects to page 8, which is on a different domain and has a PDF
|
||||
[doiPrefix + doi6, pageURL8, true],
|
||||
[pageURL8, pageURL8, true],
|
||||
];
|
||||
for (let route of routes) {
|
||||
let [expectedURL, responseURL, includePDF] = route;
|
||||
|
||||
if (url != expectedURL) continue;
|
||||
|
||||
let html = `<html>
|
||||
<head>
|
||||
<title>Page Title</title>
|
||||
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
|
||||
<meta name="citation_title" content="Title"/>
|
||||
<meta name="${includePDF ? 'citation_pdf_url' : 'ignore'}" content="${pdfURL}"/>
|
||||
</head>
|
||||
<body>Body</body>
|
||||
</html>`;
|
||||
// Return explicit 302 if not following redirects
|
||||
if (expectedURL != responseURL && options.followRedirects === false) {
|
||||
return {
|
||||
status: 302,
|
||||
getResponseHeader: makeGetResponseHeader({
|
||||
Location: responseURL
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
let html = getHTMLPage(includePDF);
|
||||
return makeHTMLResponseFromType(html, options.responseType, responseURL);
|
||||
}
|
||||
|
||||
|
@ -486,6 +509,31 @@ describe("Zotero.Attachments", function() {
|
|||
};
|
||||
}
|
||||
|
||||
// Returns a 429 every other call
|
||||
if (url.startsWith(pageURL9)) {
|
||||
if (return429) {
|
||||
return429 = false;
|
||||
throw new Zotero.HTTP.UnexpectedStatusException(
|
||||
{
|
||||
status: 429,
|
||||
response: '',
|
||||
responseURL: pageURL9,
|
||||
getResponseHeader: makeGetResponseHeader({
|
||||
'Content-Type': 'text/plain',
|
||||
'Retry-After': '2',
|
||||
})
|
||||
},
|
||||
pageURL9,
|
||||
'Failing with 429'
|
||||
);
|
||||
}
|
||||
else {
|
||||
return429 = true;
|
||||
let html = getHTMLPage(true);
|
||||
return makeHTMLResponseFromType(html, options.responseType, pageURL9);
|
||||
}
|
||||
}
|
||||
|
||||
// OA PDF lookup
|
||||
if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) {
|
||||
let json = JSON.parse(options.body);
|
||||
|
@ -525,6 +573,8 @@ describe("Zotero.Attachments", function() {
|
|||
pdfURL.substr(baseURL.length - 1),
|
||||
Zotero.File.pathToFile(OS.Path.join(getTestDataDirectory().path, 'test.pdf'))
|
||||
);
|
||||
|
||||
requestStubCallTimes = [];
|
||||
});
|
||||
|
||||
afterEach(async function () {
|
||||
|
@ -547,8 +597,8 @@ describe("Zotero.Attachments", function() {
|
|||
await item.saveTx();
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.isTrue(requestStub.calledOnce);
|
||||
assert.isTrue(requestStub.calledWith('GET', 'https://doi.org/' + doi));
|
||||
assert.isTrue(requestStub.calledTwice);
|
||||
assert.isTrue(requestStub.getCall(0).calledWith('GET', 'https://doi.org/' + doi));
|
||||
assert.ok(attachment);
|
||||
var json = attachment.toJSON();
|
||||
assert.equal(json.url, pdfURL);
|
||||
|
@ -583,8 +633,8 @@ describe("Zotero.Attachments", function() {
|
|||
await item.saveTx();
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.isTrue(requestStub.calledOnce);
|
||||
assert.isTrue(requestStub.calledWith('GET', 'https://doi.org/' + doi));
|
||||
assert.isTrue(requestStub.calledTwice);
|
||||
assert.isTrue(requestStub.getCall(0).calledWith('GET', 'https://doi.org/' + doi));
|
||||
assert.ok(attachment);
|
||||
var json = attachment.toJSON();
|
||||
assert.equal(json.url, pdfURL);
|
||||
|
@ -619,11 +669,13 @@ describe("Zotero.Attachments", function() {
|
|||
await item.saveTx();
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.isTrue(requestStub.calledTwice);
|
||||
assert.isTrue(requestStub.calledThrice);
|
||||
var call1 = requestStub.getCall(0);
|
||||
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
|
||||
var call2 = requestStub.getCall(1);
|
||||
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
assert.isTrue(call2.calledWith('GET', pageURL2));
|
||||
var call3 = requestStub.getCall(2);
|
||||
assert.isTrue(call3.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
|
||||
assert.ok(attachment);
|
||||
var json = attachment.toJSON();
|
||||
|
@ -641,16 +693,18 @@ describe("Zotero.Attachments", function() {
|
|||
await item.saveTx();
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.isTrue(requestStub.calledThrice);
|
||||
assert.equal(requestStub.callCount, 4);
|
||||
// Check the DOI (and get nothing)
|
||||
var call1 = requestStub.getCall(0);
|
||||
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
|
||||
var call = requestStub.getCall(0);
|
||||
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
|
||||
call = requestStub.getCall(1);
|
||||
assert.isTrue(call.calledWith('GET', pageURL2));
|
||||
// Check the OA resolver and get page 3
|
||||
var call2 = requestStub.getCall(1);
|
||||
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
call = requestStub.getCall(2);
|
||||
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
// Check page 3 and find the download URL
|
||||
var call3 = requestStub.getCall(2);
|
||||
assert.isTrue(call3.calledWith('GET', pageURL3));
|
||||
call = requestStub.getCall(3);
|
||||
assert.isTrue(call.calledWith('GET', pageURL3));
|
||||
|
||||
assert.ok(attachment);
|
||||
var json = attachment.toJSON();
|
||||
|
@ -669,15 +723,105 @@ describe("Zotero.Attachments", function() {
|
|||
await item.saveTx();
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.isTrue(requestStub.calledTwice);
|
||||
var call1 = requestStub.getCall(0);
|
||||
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
|
||||
var call2 = requestStub.getCall(1);
|
||||
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
assert.equal(requestStub.callCount, 3);
|
||||
var call = requestStub.getCall(0);
|
||||
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
|
||||
call = requestStub.getCall(1);
|
||||
assert.isTrue(call.calledWith('GET', pageURL4));
|
||||
call = requestStub.getCall(2);
|
||||
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
|
||||
assert.isFalse(attachment);
|
||||
});
|
||||
|
||||
it("should wait between requests to the same domain", async function () {
|
||||
var url1 = pageURL1;
|
||||
var item1 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item1.setField('title', 'Test');
|
||||
item1.setField('url', url1);
|
||||
await item1.saveTx();
|
||||
|
||||
var url2 = pageURL3;
|
||||
var item2 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item2.setField('title', 'Test');
|
||||
item2.setField('url', url2);
|
||||
await item2.saveTx();
|
||||
|
||||
var attachments = await Zotero.Attachments.addAvailablePDFs([item1, item2]);
|
||||
|
||||
assert.isTrue(requestStub.calledTwice);
|
||||
assert.isAbove(requestStubCallTimes[1] - requestStubCallTimes[0], 1000);
|
||||
// Make sure there's an attachment for every item
|
||||
assert.lengthOf(attachments.filter(x => x), 2);
|
||||
});
|
||||
|
||||
it("should wait between requests that resolve to the same domain", async function () {
|
||||
// DOI URL resolves to 'website' domain with PDF
|
||||
var url1 = doiPrefix + doi1;
|
||||
var item1 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item1.setField('title', 'Test');
|
||||
item1.setField('url', url1);
|
||||
await item1.saveTx();
|
||||
|
||||
// DOI URL resolves to 'website' domain without PDF
|
||||
var url2 = doiPrefix + doi4;
|
||||
var item2 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item2.setField('title', 'Test');
|
||||
item2.setField('url', url2);
|
||||
await item2.saveTx();
|
||||
|
||||
// DOI URL resolves to 'website2' domain without PDF
|
||||
var url3 = doiPrefix + doi6;
|
||||
var item3 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item3.setField('title', 'Test');
|
||||
item3.setField('url', url3);
|
||||
await item3.saveTx();
|
||||
|
||||
var attachments = await Zotero.Attachments.addAvailablePDFs([item1, item2, item3]);
|
||||
|
||||
assert.equal(requestStub.callCount, 6);
|
||||
assert.equal(requestStub.getCall(0).args[1], doiPrefix + doi1);
|
||||
assert.equal(requestStub.getCall(1).args[1], pageURL1);
|
||||
assert.equal(requestStub.getCall(2).args[1], doiPrefix + doi4);
|
||||
// Should skip ahead to the next DOI
|
||||
assert.equal(requestStub.getCall(3).args[1], doiPrefix + doi6);
|
||||
// which is on a new domain
|
||||
assert.equal(requestStub.getCall(4).args[1], pageURL8);
|
||||
// and then return to make 'website' request for DOI 4
|
||||
assert.equal(requestStub.getCall(5).args[1], pageURL4);
|
||||
|
||||
// 'website' requests should be a second apart
|
||||
assert.isAbove(requestStubCallTimes[5] - requestStubCallTimes[1], 1000);
|
||||
|
||||
assert.instanceOf(attachments[0], Zotero.Item);
|
||||
assert.isFalse(attachments[1]);
|
||||
assert.instanceOf(attachments[2], Zotero.Item);
|
||||
});
|
||||
|
||||
it("should wait between requests to the same domain after a 429", async function () {
|
||||
var url1 = pageURL9;
|
||||
var item1 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item1.setField('title', 'Test');
|
||||
item1.setField('url', url1);
|
||||
await item1.saveTx();
|
||||
|
||||
var url2 = pageURL3;
|
||||
var item2 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item2.setField('title', 'Test');
|
||||
item2.setField('url', url2);
|
||||
await item2.saveTx();
|
||||
|
||||
var attachments = await Zotero.Attachments.addAvailablePDFs([item1, item2]);
|
||||
|
||||
assert.isTrue(requestStub.calledThrice);
|
||||
assert.equal(requestStub.getCall(0).args[1], pageURL9);
|
||||
assert.equal(requestStub.getCall(1).args[1], pageURL9);
|
||||
assert.equal(requestStub.getCall(2).args[1], pageURL3);
|
||||
assert.isAbove(requestStubCallTimes[1] - requestStubCallTimes[0], 2000);
|
||||
// Make sure there's an attachment for every item
|
||||
assert.lengthOf(attachments.filter(x => x), 2);
|
||||
});
|
||||
|
||||
it("should handle a custom resolver in HTML mode", async function () {
|
||||
var doi = doi4;
|
||||
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
|
@ -697,13 +841,15 @@ describe("Zotero.Attachments", function() {
|
|||
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.isTrue(requestStub.calledThrice);
|
||||
var call1 = requestStub.getCall(0);
|
||||
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
|
||||
var call2 = requestStub.getCall(1);
|
||||
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
var call3 = requestStub.getCall(2);
|
||||
assert.isTrue(call3.calledWith('GET', pageURL5));
|
||||
assert.equal(requestStub.callCount, 4);
|
||||
var call = requestStub.getCall(0);
|
||||
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
|
||||
var call = requestStub.getCall(1);
|
||||
assert.isTrue(call.calledWith('GET', pageURL4));
|
||||
call = requestStub.getCall(2);
|
||||
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
call = requestStub.getCall(3);
|
||||
assert.isTrue(call.calledWith('GET', pageURL5));
|
||||
|
||||
assert.ok(attachment);
|
||||
var json = attachment.toJSON();
|
||||
|
@ -731,13 +877,15 @@ describe("Zotero.Attachments", function() {
|
|||
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.isTrue(requestStub.calledThrice);
|
||||
var call1 = requestStub.getCall(0);
|
||||
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
|
||||
var call2 = requestStub.getCall(1);
|
||||
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
var call3 = requestStub.getCall(2);
|
||||
assert.isTrue(call3.calledWith('GET', pageURL6));
|
||||
assert.equal(requestStub.callCount, 4);
|
||||
var call = requestStub.getCall(0);
|
||||
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
|
||||
call = requestStub.getCall(1);
|
||||
assert.isTrue(call.calledWith('GET', pageURL4));
|
||||
call = requestStub.getCall(2);
|
||||
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
call = requestStub.getCall(3);
|
||||
assert.isTrue(call.calledWith('GET', pageURL6));
|
||||
|
||||
assert.ok(attachment);
|
||||
var json = attachment.toJSON();
|
||||
|
@ -769,15 +917,17 @@ describe("Zotero.Attachments", function() {
|
|||
|
||||
var attachment = await Zotero.Attachments.addAvailablePDF(item);
|
||||
|
||||
assert.equal(requestStub.callCount, 4);
|
||||
var call1 = requestStub.getCall(0);
|
||||
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
|
||||
var call2 = requestStub.getCall(1);
|
||||
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
var call3 = requestStub.getCall(2);
|
||||
assert.isTrue(call3.calledWith('GET', pageURL6));
|
||||
var call4 = requestStub.getCall(3);
|
||||
assert.isTrue(call4.calledWith('GET', pageURL1));
|
||||
assert.equal(requestStub.callCount, 5);
|
||||
var call = requestStub.getCall(0);
|
||||
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
|
||||
call = requestStub.getCall(1);
|
||||
assert.isTrue(call.calledWith('GET', pageURL4));
|
||||
call = requestStub.getCall(2);
|
||||
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
|
||||
call = requestStub.getCall(3);
|
||||
assert.isTrue(call.calledWith('GET', pageURL6));
|
||||
call = requestStub.getCall(4);
|
||||
assert.isTrue(call.calledWith('GET', pageURL1));
|
||||
|
||||
assert.ok(attachment);
|
||||
var json = attachment.toJSON();
|
||||
|
|
Loading…
Reference in a new issue