Automatically delay between PDF retrieval requests to the same domain

Delay requests to the same domain by 1 second, respect a Retry-After
header if present for 429 and 503, and delay for 10 seconds on 429 or
5xx otherwise.
This commit is contained in:
Dan Stillman 2018-09-21 04:20:37 -04:00
parent 536c07e9f4
commit d899134e7c
3 changed files with 590 additions and 100 deletions

View file

@ -909,7 +909,11 @@ Zotero.Attachments = new function(){
* @param {Boolean} [automatic=false] - Only include custom resolvers with `automatic: true`
* @return {Object[]} - An array of urlResolvers (see downloadFirstAvailableFile())
*/
this.getPDFResolvers = function (item, methods = ['doi', 'url', 'oa', 'custom'], automatic) {
this.getPDFResolvers = function (item, methods, automatic) {
if (!methods) {
methods = ['doi', 'url', 'oa', 'custom'];
}
var useDOI = methods.includes('doi');
var useURL = methods.includes('url');
var useOA = methods.includes('oa');
@ -1098,16 +1102,253 @@ Zotero.Attachments = new function(){
};
/**
* @param {Zotero.Item[]} items
* @param {Object} [options]
* @param {String[]} [options.methods] - See getPDFResolvers()
* @param {Function} [options.onProgress]
* @param {Number} [options.sameDomainRequestDelay=1000] - Minimum number of milliseconds
* between requests to the same domain (used in tests)
*/
this.addAvailablePDFs = async function (items, options = {}) {
const MAX_CONSECUTIVE_DOMAIN_FAILURES = 5;
const SAME_DOMAIN_REQUEST_DELAY = options.sameDomainRequestDelay || 1000;
var domains = new Map();
var queue = items.map((item) => {
return {
item,
urlResolvers: this.getPDFResolvers(item, options.methods),
domain: null,
continuation: null,
result: null,
};
});
function getDomainInfo(domain) {
var domainInfo = domains.get(domain);
if (!domainInfo) {
domainInfo = {
nextRequestTime: 0,
consecutiveFailures: 0
};
domains.set(domain, domainInfo);
}
return domainInfo;
}
var completed = 0;
var lastQueueStart = new Date();
var i = 0;
//
// Process items in the queue
//
await new Promise((resolve) => {
var processNextItem = function () {
// All items processed
if (completed == queue.length) {
resolve();
return;
}
if (i == 0) {
lastQueueStart = new Date();
}
var current = queue[i++];
// If we got to the end of the queue, wait until the next time a pending request
// is ready to process
if (!current) {
let nextStart = queue
.map(x => x.result === null && getDomainInfo(x.domain).nextRequestTime)
.filter(x => x)
.reduce((accumulator, currentValue) => {
return currentValue < accumulator ? currentValue : accumulator;
});
i = 0;
setTimeout(processNextItem, Math.max(0, nextStart - Date.now()));
return;
}
// If item was already processed, skip
if (current.result !== null) {
processNextItem();
return;
}
// If processing for a domain was paused and not enough time has passed, skip ahead
if (current.domain && getDomainInfo(current.domain).nextRequestTime > Date.now()) {
processNextItem();
return;
}
// Resume paused item
if (current.continuation) {
current.continuation();
return;
}
if (!this.canFindPDFForItem(current.item)) {
current.result = false;
completed++;
if (options.onProgress) {
options.onProgress(completed, queue.length);
}
processNextItem();
return;
}
// Process item
this.addPDFFromURLs(
current.item,
current.urlResolvers,
{
onBeforeRequest: async function (url, noDelay) {
var domain = urlToDomain(url);
// Don't delay between subsequent requests to the DOI resolver or
// to localhost in tests
if (['doi.org', 'localhost'].includes(domain)) {
return;
}
var domainInfo = getDomainInfo(domain);
// If too many requests have failed, stop trying
if (domainInfo.consecutiveFailures > MAX_CONSECUTIVE_DOMAIN_FAILURES) {
current.result = false;
throw new Error(`Too many failed requests for ${urlToDomain(url)}`);
}
// If enough time hasn't passed since the last attempt for this domain,
// skip for now and process more items
let nextRequestTime = domainInfo.nextRequestTime;
if (!noDelay && nextRequestTime > Date.now()) {
return new Promise((resolve, reject) => {
Zotero.debug(`Delaying request to ${domain} for ${nextRequestTime - Date.now()} ms`);
current.domain = domain;
current.continuation = () => {
if (domainInfo.consecutiveFailures < MAX_CONSECUTIVE_DOMAIN_FAILURES) {
resolve();
}
else {
reject(new Error(`Too many failed requests for ${urlToDomain(url)}`));
}
};
processNextItem();
});
}
domainInfo.nextRequestTime = Date.now() + SAME_DOMAIN_REQUEST_DELAY;
},
// Reset consecutive failures on successful request
onAfterRequest: function (url) {
var domain = urlToDomain(url);
// Ignore localhost in tests
if (domain == 'localhost') {
return;
}
var domainInfo = getDomainInfo(domain);
domainInfo.consecutiveFailures = 0;
},
onRequestError: function (e) {
const maxDelay = 3600;
if (e instanceof Zotero.HTTP.UnexpectedStatusException) {
let domain = urlToDomain(e.url);
let domainInfo = getDomainInfo(domain);
domainInfo.consecutiveFailures++;
let status = e.status;
// Retry-After
if (status == 429 || status == 503) {
let retryAfter = e.xmlhttp.getResponseHeader('Retry-After');
if (retryAfter) {
Zotero.debug("Got Retry-After: " + retryAfter);
if (parseInt(retryAfter) == retryAfter) {
if (retryAfter > maxDelay) {
Zotero.debug("Retry-After is too long -- skipping request");
return false;
}
domainInfo.nextRequestTime = Date.now() + retryAfter * 1000;
return true;
}
else if (Zotero.Date.isHTTPDate(retryAfter)) {
let d = new Date(val);
if (d > Date.now() + maxDelay * 1000) {
Zotero.debug("Retry-After is too long -- skipping request");
return false;
}
domainInfo.nextRequestTime = d.getTime();
return true;
}
Zotero.debug("Invalid Retry-After value -- skipping request");
return false;
}
}
// If not specified, wait 10 seconds before next request to domain
if (e.status == 429 || e.is5xx()) {
domainInfo.nextRequestTime = Date.now() + 10000;
return true;
}
current.result = false;
}
else {
current.result = false;
}
}
}
)
.then((attachment) => {
current.result = attachment;
})
.catch((e) => {
Zotero.logError(e);
current.result = false;
})
// finally() isn't implemented until Firefox 58, but then() is the same here
//.finally(() => {
.then(function () {
completed++;
if (options.onProgress) {
options.onProgress(completed, queue.length);
}
processNextItem();
});
}.bind(this);
processNextItem();
});
return queue.map(x => x.result);
};
function urlToDomain(url) {
return Services.io.newURI(url, null, null).host;
}
/**
* Look for an available PDF for an item and add it as an attachment
*
* @param {Zotero.Item} item
* @param {String[]} [methods=['doi', 'url', 'oa', 'custom']]
* @param {Object} [options]
* @param {String[]} [options.methods] - See getPDFResolvers()
* @return {Zotero.Item|false} - New Zotero.Item, or false if unsuccessful
*/
this.addAvailablePDF = async function (item, methods = ['doi', 'url', 'oa', 'custom']) {
this.addAvailablePDF = async function (item, options = {}) {
Zotero.debug("Looking for available PDFs");
return this.addPDFFromURLs(item, this.getPDFResolvers(...arguments));
return this.addPDFFromURLs(item, this.getPDFResolvers(item, options.methods));
};
@ -1134,7 +1375,9 @@ Zotero.Attachments = new function(){
tmpFile,
{
isPDF: true,
onAccessMethodStart: options.onAccessMethodStart
onAccessMethodStart: options.onAccessMethodStart,
onBeforeRequest: options.onBeforeRequest,
onRequestError: options.onRequestError
}
);
if (url) {
@ -1198,7 +1441,11 @@ Zotero.Attachments = new function(){
* 'acceptedVersion', or 'publishedVersion'). Functions that return promises are waited for,
* and functions aren't called unless a file hasn't yet been found from an earlier entry.
* @param {String} path - Path to save file to
* @param {Object} [options] - Options to pass to this.downloadFile()
* @param {Object} [options]
* @param {Function} [options.onBeforeRequest] - Async function that runs before a request
* @param {Function} [options.onAfterRequest] - Function that runs after a request
* @param {Function} [options.onRequestError] - Function that runs when a request fails.
* Return true to retry request and false to skip.
* @return {Object|false} - Object with successful 'url' and 'props' from the associated urlResolver,
* or false if no file could be downloaded
*/
@ -1209,12 +1456,42 @@ Zotero.Attachments = new function(){
// Operate on copy, since we might change things
urlResolvers = [...urlResolvers];
// Don't try the same URL more than once
// Don't try the same normalized URL more than once
var triedURLs = new Set();
var triedPages = new Set();
function normalizeURL(url) {
return url.replace(/\?.*/, '');
}
function isTriedURL(url) {
return triedURLs.has(normalizeURL(url));
}
function addTriedURL(url) {
triedURLs.add(normalizeURL(url));
}
// Check a URL against options.onBeforeRequest(), which can delay or cancel the request
async function beforeRequest(url, noDelay) {
if (options.onBeforeRequest) {
await options.onBeforeRequest(url, noDelay);
}
}
function afterRequest(url) {
if (options.onAfterRequest) {
options.onAfterRequest(url);
}
}
function handleRequestError(e) {
if (options.onRequestError) {
return options.onRequestError(e);
}
}
for (let i = 0; i < urlResolvers.length; i++) {
let urlResolver = urlResolvers[i];
// If resolver is a function, run it and then replace it in the resolvers list with
// the results
if (typeof urlResolver == 'function') {
try {
urlResolver = await urlResolver();
@ -1254,11 +1531,11 @@ Zotero.Attachments = new function(){
}
// Ignore URLs we've already tried
if (url && triedURLs.has(url)) {
if (url && isTriedURL(url)) {
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
url = null;
}
if (pageURL && triedPages.has(pageURL)) {
if (pageURL && isTriedURL(pageURL)) {
Zotero.debug(`Page at ${pageURL} was already tried -- skipping`);
pageURL = null;
}
@ -1277,33 +1554,94 @@ Zotero.Attachments = new function(){
// Try URL first if available
if (url) {
triedURLs.add(url);
addTriedURL(url);
// Backoff loop
let tries = 3;
while (tries-- >= 0) {
try {
await beforeRequest(url);
await this.downloadFile(url, path, options);
afterRequest(url);
return { url, props: urlResolver };
}
catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`);
Zotero.debug(`Error downloading ${url}: ${e}\n\n${e.stack}`);
if (handleRequestError(e)) {
continue;
}
}
break;
}
}
// If URL wasn't available or failed, try to get a URL from a page
if (pageURL) {
triedPages.add(pageURL);
addTriedURL(pageURL);
url = null;
let responseURL;
try {
Zotero.debug(`Looking for PDF on ${pageURL}`);
// TODO: Handle redirects manually so we can avoid loading a page we've already
// tried
let req = await Zotero.HTTP.request("GET", pageURL, { responseType: 'blob' });
let redirects = 0;
let nextURL = pageURL;
let req;
let skip = false;
let domains = new Set();
while (true) {
let domain = urlToDomain(nextURL);
let noDelay = domains.has(domain);
domains.add(domain);
// Backoff loop
let tries = 3;
while (tries-- >= 0) {
try {
await beforeRequest(nextURL, noDelay);
req = await Zotero.HTTP.request(
'GET',
nextURL,
{
responseType: 'blob',
followRedirects: false
}
);
}
catch (e) {
if (handleRequestError(e)) {
// Even if this was initially a same-domain redirect, we should
// now obey delays, since we just set one
noDelay = false;
continue;
}
throw e;
}
break;
}
afterRequest(nextURL);
if ([301, 302, 303, 307].includes(req.status)) {
let location = req.getResponseHeader('Location');
if (!location) {
throw new Error("Location header not provided");
}
nextURL = Services.io.newURI(nextURL, null, null).resolve(location);
if (isTriedURL(nextURL)) {
Zotero.debug("Redirect URL has already been tried -- skipping");
skip = true;
break;
}
continue;
}
break;
}
if (skip) {
continue;
}
let blob = req.response;
responseURL = req.responseURL;
if (pageURL != responseURL) {
Zotero.debug("Redirected to " + responseURL);
}
triedPages.add(responseURL);
addTriedURL(responseURL);
let contentType = req.getResponseHeader('Content-Type');
// If DOI resolves directly to a PDF, save it to disk
@ -1320,27 +1658,37 @@ Zotero.Attachments = new function(){
}
}
catch (e) {
Zotero.debug(`Error getting PDF from ${pageURL}: ${e}`);
Zotero.debug(`Error getting PDF from ${pageURL}: ${e}\n\n${e.stack}`);
continue;
}
if (!url) {
Zotero.debug(`No PDF found on ${responseURL}`);
continue;
}
if (triedURLs.has(url)) {
if (isTriedURL(url)) {
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
continue;
}
triedURLs.add(url);
addTriedURL(url);
// Use the page we loaded as the referrer
let downloadOptions = Object.assign({}, options, { referrer: responseURL });
// Backoff loop
let tries = 3;
while (tries-- >= 0) {
try {
await beforeRequest(url);
await this.downloadFile(url, path, downloadOptions);
afterRequest(url);
return { url, props: urlResolver };
}
catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`);
Zotero.debug(`Error downloading ${url}: ${e}\n\n${e.stack}`);
if (handleRequestError(e)) {
continue;
}
}
break;
}
}
}

View file

@ -3708,27 +3708,19 @@ var ZoteroPane = new function()
);
progressWin.show();
var successful = 0;
for (let i = 0; i < items.length; i++) {
let item = items[i];
if (Zotero.Attachments.canFindPDFForItem(item)) {
try {
let attachment = await Zotero.Attachments.addAvailablePDF(item);
if (attachment) {
successful++;
var results = await Zotero.Attachments.addAvailablePDFs(
items,
{
onProgress: (progress, progressMax) => {
itemProgress.setProgress((progress / progressMax) * 100);
}
}
catch (e) {
Zotero.logError(e);
}
}
itemProgress.setProgress(((i + 1) / items.length) * 100);
}
);
itemProgress.setProgress(100);
itemProgress.setIcon(icon);
var successful = results.filter(x => x).length;
if (successful) {
itemProgress.setText(Zotero.getString('findPDF.pdfsAdded', successful, successful));
}

View file

@ -337,13 +337,14 @@ describe("Zotero.Attachments", function() {
});
});
describe("#addAvailablePDF()", function () {
describe("PDF Retrieval", function () {
var doiPrefix = 'https://doi.org/';
var doi1 = '10.1111/abcd';
var doi2 = '10.2222/bcde';
var doi3 = '10.3333/cdef';
var doi4 = '10.4444/defg';
var doi5 = '10.5555/efgh';
var doi6 = '10.6666/fghi';
var pageURL1 = 'http://website/article1';
var pageURL2 = 'http://website/article2';
var pageURL3 = 'http://website/article3';
@ -351,6 +352,8 @@ describe("Zotero.Attachments", function() {
var pageURL5 = `http://website/${doi4}`;
var pageURL6 = `http://website/${doi4}/json`;
var pageURL7 = doiPrefix + doi5;
var pageURL8 = 'http://website2/article8';
var pageURL9 = 'http://website/article9';
Components.utils.import("resource://zotero-unit/httpd.js");
var httpd;
@ -360,16 +363,30 @@ describe("Zotero.Attachments", function() {
var pdfURL = `${baseURL}article1/pdf`;
var pdfSize;
var requestStub;
var requestStubCallTimes = [];
var return429 = true;
function makeGetResponseHeader(headers) {
return function (header) {
if (headers[header] !== undefined) {
return headers[header];
}
throw new Error("Unimplemented");
throw new Error(`Unimplemented header '${header}'`);
};
}
function getHTMLPage(includePDF) {
return `<html>
<head>
<title>Page Title</title>
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
<meta name="citation_title" content="Title"/>
<meta name="${includePDF ? 'citation_pdf_url' : 'ignore'}" content="${pdfURL}"/>
</head>
<body>Body</body>
</html>`;
}
function makeHTMLResponseFromType(html, responseType, responseURL) {
var response;
if (responseType == 'document') {
@ -403,38 +420,44 @@ describe("Zotero.Attachments", function() {
requestStub = sinon.stub(Zotero.HTTP, 'request');
requestStub.callsFake(function (method, url, options) {
Zotero.debug("Intercepting " + method + " " + url);
requestStubCallTimes.push(new Date());
// Page responses
var routes = [
// Page 1 contains a PDF
[pageURL1, pageURL1, true],
// DOI 1 redirects to page 1, which contains a PDF
[doiPrefix + doi1, pageURL1, true],
[pageURL1, pageURL1, true],
// DOI 2 redirects to page 2, which doesn't contain a PDF, but DOI 2 has an
// OA entry for the PDF URL
[doiPrefix + doi2, pageURL2, false],
[pageURL2, pageURL2, false],
// DOI 3 redirects to page 2, which doesn't contain a PDF, but DOI 3 contains
// an OA entry for page 3, which contains a PDF)
[doiPrefix + doi3, pageURL2, false],
[pageURL3, pageURL3, true],
// DOI 4 redirects to page 4, which doesn't contain a PDF
[doiPrefix + doi4, pageURL4, false],
[pageURL4, pageURL4, false],
// DOI 6 redirects to page 8, which is on a different domain and has a PDF
[doiPrefix + doi6, pageURL8, true],
[pageURL8, pageURL8, true],
];
for (let route of routes) {
let [expectedURL, responseURL, includePDF] = route;
if (url != expectedURL) continue;
let html = `<html>
<head>
<title>Page Title</title>
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
<meta name="citation_title" content="Title"/>
<meta name="${includePDF ? 'citation_pdf_url' : 'ignore'}" content="${pdfURL}"/>
</head>
<body>Body</body>
</html>`;
// Return explicit 302 if not following redirects
if (expectedURL != responseURL && options.followRedirects === false) {
return {
status: 302,
getResponseHeader: makeGetResponseHeader({
Location: responseURL
})
};
}
let html = getHTMLPage(includePDF);
return makeHTMLResponseFromType(html, options.responseType, responseURL);
}
@ -486,6 +509,31 @@ describe("Zotero.Attachments", function() {
};
}
// Returns a 429 every other call
if (url.startsWith(pageURL9)) {
if (return429) {
return429 = false;
throw new Zotero.HTTP.UnexpectedStatusException(
{
status: 429,
response: '',
responseURL: pageURL9,
getResponseHeader: makeGetResponseHeader({
'Content-Type': 'text/plain',
'Retry-After': '2',
})
},
pageURL9,
'Failing with 429'
);
}
else {
return429 = true;
let html = getHTMLPage(true);
return makeHTMLResponseFromType(html, options.responseType, pageURL9);
}
}
// OA PDF lookup
if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) {
let json = JSON.parse(options.body);
@ -525,6 +573,8 @@ describe("Zotero.Attachments", function() {
pdfURL.substr(baseURL.length - 1),
Zotero.File.pathToFile(OS.Path.join(getTestDataDirectory().path, 'test.pdf'))
);
requestStubCallTimes = [];
});
afterEach(async function () {
@ -547,8 +597,8 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledOnce);
assert.isTrue(requestStub.calledWith('GET', 'https://doi.org/' + doi));
assert.isTrue(requestStub.calledTwice);
assert.isTrue(requestStub.getCall(0).calledWith('GET', 'https://doi.org/' + doi));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
@ -583,8 +633,8 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledOnce);
assert.isTrue(requestStub.calledWith('GET', 'https://doi.org/' + doi));
assert.isTrue(requestStub.calledTwice);
assert.isTrue(requestStub.getCall(0).calledWith('GET', 'https://doi.org/' + doi));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
@ -619,11 +669,13 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledTwice);
assert.isTrue(requestStub.calledThrice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.isTrue(call2.calledWith('GET', pageURL2));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.ok(attachment);
var json = attachment.toJSON();
@ -641,16 +693,18 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledThrice);
assert.equal(requestStub.callCount, 4);
// Check the DOI (and get nothing)
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call = requestStub.getCall(0);
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
call = requestStub.getCall(1);
assert.isTrue(call.calledWith('GET', pageURL2));
// Check the OA resolver and get page 3
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
call = requestStub.getCall(2);
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
// Check page 3 and find the download URL
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL3));
call = requestStub.getCall(3);
assert.isTrue(call.calledWith('GET', pageURL3));
assert.ok(attachment);
var json = attachment.toJSON();
@ -669,15 +723,105 @@ describe("Zotero.Attachments", function() {
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledTwice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.equal(requestStub.callCount, 3);
var call = requestStub.getCall(0);
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
call = requestStub.getCall(1);
assert.isTrue(call.calledWith('GET', pageURL4));
call = requestStub.getCall(2);
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.isFalse(attachment);
});
it("should wait between requests to the same domain", async function () {
var url1 = pageURL1;
var item1 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item1.setField('title', 'Test');
item1.setField('url', url1);
await item1.saveTx();
var url2 = pageURL3;
var item2 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item2.setField('title', 'Test');
item2.setField('url', url2);
await item2.saveTx();
var attachments = await Zotero.Attachments.addAvailablePDFs([item1, item2]);
assert.isTrue(requestStub.calledTwice);
assert.isAbove(requestStubCallTimes[1] - requestStubCallTimes[0], 1000);
// Make sure there's an attachment for every item
assert.lengthOf(attachments.filter(x => x), 2);
});
it("should wait between requests that resolve to the same domain", async function () {
// DOI URL resolves to 'website' domain with PDF
var url1 = doiPrefix + doi1;
var item1 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item1.setField('title', 'Test');
item1.setField('url', url1);
await item1.saveTx();
// DOI URL resolves to 'website' domain without PDF
var url2 = doiPrefix + doi4;
var item2 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item2.setField('title', 'Test');
item2.setField('url', url2);
await item2.saveTx();
// DOI URL resolves to 'website2' domain without PDF
var url3 = doiPrefix + doi6;
var item3 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item3.setField('title', 'Test');
item3.setField('url', url3);
await item3.saveTx();
var attachments = await Zotero.Attachments.addAvailablePDFs([item1, item2, item3]);
assert.equal(requestStub.callCount, 6);
assert.equal(requestStub.getCall(0).args[1], doiPrefix + doi1);
assert.equal(requestStub.getCall(1).args[1], pageURL1);
assert.equal(requestStub.getCall(2).args[1], doiPrefix + doi4);
// Should skip ahead to the next DOI
assert.equal(requestStub.getCall(3).args[1], doiPrefix + doi6);
// which is on a new domain
assert.equal(requestStub.getCall(4).args[1], pageURL8);
// and then return to make 'website' request for DOI 4
assert.equal(requestStub.getCall(5).args[1], pageURL4);
// 'website' requests should be a second apart
assert.isAbove(requestStubCallTimes[5] - requestStubCallTimes[1], 1000);
assert.instanceOf(attachments[0], Zotero.Item);
assert.isFalse(attachments[1]);
assert.instanceOf(attachments[2], Zotero.Item);
});
it("should wait between requests to the same domain after a 429", async function () {
var url1 = pageURL9;
var item1 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item1.setField('title', 'Test');
item1.setField('url', url1);
await item1.saveTx();
var url2 = pageURL3;
var item2 = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item2.setField('title', 'Test');
item2.setField('url', url2);
await item2.saveTx();
var attachments = await Zotero.Attachments.addAvailablePDFs([item1, item2]);
assert.isTrue(requestStub.calledThrice);
assert.equal(requestStub.getCall(0).args[1], pageURL9);
assert.equal(requestStub.getCall(1).args[1], pageURL9);
assert.equal(requestStub.getCall(2).args[1], pageURL3);
assert.isAbove(requestStubCallTimes[1] - requestStubCallTimes[0], 2000);
// Make sure there's an attachment for every item
assert.lengthOf(attachments.filter(x => x), 2);
});
it("should handle a custom resolver in HTML mode", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
@ -697,13 +841,15 @@ describe("Zotero.Attachments", function() {
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledThrice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL5));
assert.equal(requestStub.callCount, 4);
var call = requestStub.getCall(0);
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
var call = requestStub.getCall(1);
assert.isTrue(call.calledWith('GET', pageURL4));
call = requestStub.getCall(2);
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
call = requestStub.getCall(3);
assert.isTrue(call.calledWith('GET', pageURL5));
assert.ok(attachment);
var json = attachment.toJSON();
@ -731,13 +877,15 @@ describe("Zotero.Attachments", function() {
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(requestStub.calledThrice);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL6));
assert.equal(requestStub.callCount, 4);
var call = requestStub.getCall(0);
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
call = requestStub.getCall(1);
assert.isTrue(call.calledWith('GET', pageURL4));
call = requestStub.getCall(2);
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
call = requestStub.getCall(3);
assert.isTrue(call.calledWith('GET', pageURL6));
assert.ok(attachment);
var json = attachment.toJSON();
@ -769,15 +917,17 @@ describe("Zotero.Attachments", function() {
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.equal(requestStub.callCount, 4);
var call1 = requestStub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = requestStub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
var call3 = requestStub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL6));
var call4 = requestStub.getCall(3);
assert.isTrue(call4.calledWith('GET', pageURL1));
assert.equal(requestStub.callCount, 5);
var call = requestStub.getCall(0);
assert.isTrue(call.calledWith('GET', 'https://doi.org/' + doi));
call = requestStub.getCall(1);
assert.isTrue(call.calledWith('GET', pageURL4));
call = requestStub.getCall(2);
assert.isTrue(call.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
call = requestStub.getCall(3);
assert.isTrue(call.calledWith('GET', pageURL6));
call = requestStub.getCall(4);
assert.isTrue(call.calledWith('GET', pageURL1));
assert.ok(attachment);
var json = attachment.toJSON();