From 86a5c46b1e52753887f7f51b2afd63048e0922c5 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Sun, 2 Feb 2020 23:34:57 -0500 Subject: [PATCH] Find Available PDF: Don't mark URLs that redirect as tried https://forums.zotero.org/discussion/81182 --- chrome/content/zotero/xpcom/attachments.js | 20 ++++++++++++++++---- test/tests/attachmentsTest.js | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index 38ad57614b..0988c2acd1 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -1666,7 +1666,6 @@ Zotero.Attachments = new function(){ // If URL wasn't available or failed, try to get a URL from a page if (pageURL) { - addTriedURL(pageURL); url = null; let responseURL; try { @@ -1722,7 +1721,6 @@ Zotero.Attachments = new function(){ skip = true; break; } - addTriedURL(nextURL); continue; } @@ -1732,10 +1730,11 @@ Zotero.Attachments = new function(){ Zotero.debug("Redirected to " + responseURL); } - // If HTML, check for a meta redirect contentType = req.getResponseHeader('Content-Type'); if (contentType.startsWith('text/html')) { doc = await Zotero.Utilities.Internal.blobToHTMLDocument(blob, responseURL); + + // Check for a meta redirect on HTML pages let refreshURL = Zotero.HTTP.getHTMLMetaRefreshURL(doc, responseURL); if (refreshURL) { if (isTriedURL(refreshURL)) { @@ -1745,10 +1744,22 @@ Zotero.Attachments = new function(){ } doc = null; nextURL = refreshURL; - addTriedURL(nextURL); continue; } } + + // Don't try this page URL again + // + // We only do this for URLs that don't redirect, since some sites seem to + // use redirects plus cookies for IP-based authentication [1]. The downside + // is that we might follow the same set of redirects more than once, but we + // won't process the final page multiple times, and if a publisher URL does + // redirect that's hopefully a decent indication that a PDF will be found + // the first time around. + // + // [1] https://forums.zotero.org/discussion/81182 + addTriedURL(responseURL); + break; } if (skip) { @@ -1779,6 +1790,7 @@ Zotero.Attachments = new function(){ Zotero.debug(`PDF at ${url} was already tried -- skipping`); continue; } + // Don't try this PDF URL again addTriedURL(url); // Use the page we loaded as the referrer diff --git a/test/tests/attachmentsTest.js b/test/tests/attachmentsTest.js index 3c41df8891..c223c8d535 100644 --- a/test/tests/attachmentsTest.js +++ b/test/tests/attachmentsTest.js @@ -726,7 +726,7 @@ describe("Zotero.Attachments", function() { assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize); }); - it("shouldn't try the redirected DOI page again if also in the URL field", async function () { + it("shouldn't try the URL-field URL again if it was already checked as the redirected DOI URL", async function () { var doi = doi4; var item = createUnsavedDataObject('item', { itemType: 'journalArticle' }); item.setField('title', 'Test');