Find Available PDF: Don't mark URLs that redirect as tried

https://forums.zotero.org/discussion/81182
This commit is contained in:
Dan Stillman 2020-02-02 23:34:57 -05:00
parent b4b33c07de
commit 86a5c46b1e
2 changed files with 17 additions and 5 deletions

View file

@ -1666,7 +1666,6 @@ Zotero.Attachments = new function(){
// If URL wasn't available or failed, try to get a URL from a page
if (pageURL) {
addTriedURL(pageURL);
url = null;
let responseURL;
try {
@ -1722,7 +1721,6 @@ Zotero.Attachments = new function(){
skip = true;
break;
}
addTriedURL(nextURL);
continue;
}
@ -1732,10 +1730,11 @@ Zotero.Attachments = new function(){
Zotero.debug("Redirected to " + responseURL);
}
// If HTML, check for a meta redirect
contentType = req.getResponseHeader('Content-Type');
if (contentType.startsWith('text/html')) {
doc = await Zotero.Utilities.Internal.blobToHTMLDocument(blob, responseURL);
// Check for a meta redirect on HTML pages
let refreshURL = Zotero.HTTP.getHTMLMetaRefreshURL(doc, responseURL);
if (refreshURL) {
if (isTriedURL(refreshURL)) {
@ -1745,10 +1744,22 @@ Zotero.Attachments = new function(){
}
doc = null;
nextURL = refreshURL;
addTriedURL(nextURL);
continue;
}
}
// Don't try this page URL again
//
// We only do this for URLs that don't redirect, since some sites seem to
// use redirects plus cookies for IP-based authentication [1]. The downside
// is that we might follow the same set of redirects more than once, but we
// won't process the final page multiple times, and if a publisher URL does
// redirect that's hopefully a decent indication that a PDF will be found
// the first time around.
//
// [1] https://forums.zotero.org/discussion/81182
addTriedURL(responseURL);
break;
}
if (skip) {
@ -1779,6 +1790,7 @@ Zotero.Attachments = new function(){
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
continue;
}
// Don't try this PDF URL again
addTriedURL(url);
// Use the page we loaded as the referrer

View file

@ -726,7 +726,7 @@ describe("Zotero.Attachments", function() {
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("shouldn't try the redirected DOI page again if also in the URL field", async function () {
it("shouldn't try the URL-field URL again if it was already checked as the redirected DOI URL", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');