Find Available PDF: Don't mark URLs that redirect as tried
https://forums.zotero.org/discussion/81182
This commit is contained in:
parent
b4b33c07de
commit
86a5c46b1e
2 changed files with 17 additions and 5 deletions
|
@ -1666,7 +1666,6 @@ Zotero.Attachments = new function(){
|
|||
|
||||
// If URL wasn't available or failed, try to get a URL from a page
|
||||
if (pageURL) {
|
||||
addTriedURL(pageURL);
|
||||
url = null;
|
||||
let responseURL;
|
||||
try {
|
||||
|
@ -1722,7 +1721,6 @@ Zotero.Attachments = new function(){
|
|||
skip = true;
|
||||
break;
|
||||
}
|
||||
addTriedURL(nextURL);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1732,10 +1730,11 @@ Zotero.Attachments = new function(){
|
|||
Zotero.debug("Redirected to " + responseURL);
|
||||
}
|
||||
|
||||
// If HTML, check for a meta redirect
|
||||
contentType = req.getResponseHeader('Content-Type');
|
||||
if (contentType.startsWith('text/html')) {
|
||||
doc = await Zotero.Utilities.Internal.blobToHTMLDocument(blob, responseURL);
|
||||
|
||||
// Check for a meta redirect on HTML pages
|
||||
let refreshURL = Zotero.HTTP.getHTMLMetaRefreshURL(doc, responseURL);
|
||||
if (refreshURL) {
|
||||
if (isTriedURL(refreshURL)) {
|
||||
|
@ -1745,10 +1744,22 @@ Zotero.Attachments = new function(){
|
|||
}
|
||||
doc = null;
|
||||
nextURL = refreshURL;
|
||||
addTriedURL(nextURL);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't try this page URL again
|
||||
//
|
||||
// We only do this for URLs that don't redirect, since some sites seem to
|
||||
// use redirects plus cookies for IP-based authentication [1]. The downside
|
||||
// is that we might follow the same set of redirects more than once, but we
|
||||
// won't process the final page multiple times, and if a publisher URL does
|
||||
// redirect that's hopefully a decent indication that a PDF will be found
|
||||
// the first time around.
|
||||
//
|
||||
// [1] https://forums.zotero.org/discussion/81182
|
||||
addTriedURL(responseURL);
|
||||
|
||||
break;
|
||||
}
|
||||
if (skip) {
|
||||
|
@ -1779,6 +1790,7 @@ Zotero.Attachments = new function(){
|
|||
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
|
||||
continue;
|
||||
}
|
||||
// Don't try this PDF URL again
|
||||
addTriedURL(url);
|
||||
|
||||
// Use the page we loaded as the referrer
|
||||
|
|
|
@ -726,7 +726,7 @@ describe("Zotero.Attachments", function() {
|
|||
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
|
||||
});
|
||||
|
||||
it("shouldn't try the redirected DOI page again if also in the URL field", async function () {
|
||||
it("shouldn't try the URL-field URL again if it was already checked as the redirected DOI URL", async function () {
|
||||
var doi = doi4;
|
||||
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
|
||||
item.setField('title', 'Test');
|
||||
|
|
Loading…
Reference in a new issue