PDF retrieval improvements

- Add the ability to extract a PDF URL from a given webpage using the
  translation framework
- Add the ability to get open-access PDFs from landing pages from
  Unpaywall data in addition to direct PDF URLs
- Use the above functionality to improve PDF retrieval for "Add Item by
  Identifier"
- Add "Find Available PDFs" option to the item context menu to retrieve
  PDFs for existing items from the DOI or URL page or using Unpaywall
  data. The option appears for single items with a DOI or URL and no PDF,
  and it always appears when selecting multiple top-level items (but
  skips ineligible items).

PDF extraction from DOI/URL pages will currently only work with
unauthenticated access (i.e., on-campus or VPN, but not via a web-based
proxy).

Supersedes and closes #948
This commit is contained in:
Dan Stillman 2018-08-07 04:08:47 -04:00
parent 868a21b7e7
commit 679a6d5cc7
8 changed files with 519 additions and 65 deletions

View file

@ -68,12 +68,13 @@ var Zotero_Lookup = new function () {
libraryID,
collections: collection ? [collection.id] : false
});
// If there's a DOI and we don't yet have a file, check for open-access PDFs
// TEMP: Limit to dev builds
if ((Zotero.version.includes('beta') || Zotero.version.includes('SOURCE'))
&& identifier.DOI && !newItems.find(x => x.isImportedAttachment())) {
// If we don't yet have a file, check for available PDFs
if (Zotero.Prefs.get('downloadAssociatedFiles')
&& !newItems.find(x => x.isImportedAttachment())
// TEMP: Limit to dev builds
&& (Zotero.version.includes('beta') || Zotero.version.includes('SOURCE'))) {
try {
yield Zotero.Attachments.addOpenAccessPDF(newItems[0]);
yield Zotero.Attachments.addAvailablePDF(newItems[0]);
}
catch (e) {
Zotero.logError(e);

View file

@ -835,7 +835,7 @@ Zotero.Attachments = new function(){
* @param {Boolean} [options.isPDF] - Delete file if not PDF
*/
this.downloadFile = async function (url, path, options = {}) {
Zotero.debug(`Downloading ${url}`);
Zotero.debug(`Downloading file from ${url}`);
try {
await new Zotero.Promise(function (resolve) {
@ -879,53 +879,56 @@ Zotero.Attachments = new function(){
/**
* Try to download a file from a list of URLs, keeping the first one that succeeds
*
*
* @param {String[]} urls
* @param {String} path
* @param {Object} [options] - Options to pass to this.downloadFile()
* @return {String|false} - URL that succeeded, or false if none
*/
this.downloadFirstAvailableFile = async function (urls, path, options) {
var url;
urls = [...urls];
while (url = urls.shift()) {
try {
await this.downloadFile(url, path, options);
return url;
}
catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`);
}
}
return false;
};
/**
* Look for an open-access PDF for an item and add it as an attachment
* Look for an available PDF for an item and add it as an attachment
*
* @param {Zotero.Item} item
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful
*/
this.addOpenAccessPDF = async function (item) {
if (!Zotero.Prefs.get('downloadAssociatedFiles')) {
return false;
this.addAvailablePDF = async function (item, modes = ['doi', 'url', 'oa']) {
Zotero.debug("Looking for available PDFs");
var useDOI = modes.includes('doi');
var useURL = modes.includes('url');
var useOA = modes.includes('oa');
var urlObjects = [];
if (useDOI) {
let doi = item.getField('DOI');
if (doi) {
doi = Zotero.Utilities.cleanDOI(doi);
if (doi) {
urlObjects.push({ pageURL: 'https://doi.org/' + doi });
}
}
}
var doi = item.getField('DOI');
if (!doi) {
return false;
if (useURL) {
let url = item.getField('url');
if (url) {
url = Zotero.Utilities.cleanURL(url);
if (url) {
urlObjects.push({ pageURL: url });
}
}
}
try {
var urlObjects = await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi);
}
catch (e) {
Zotero.logError(e);
return false;
if (useOA) {
urlObjects.push(async function () {
var doi = item.getField('DOI');
if (!doi) {
return [];
}
try {
return await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi);
}
catch (e) {
Zotero.logError(e);
return [];
}
});
}
if (!urlObjects.length) {
return false;
}
@ -938,7 +941,7 @@ Zotero.Attachments = new function(){
* Try to add a PDF to an item from a set of possible URLs
*
* @param {Zotero.Item} item
* @param {Object[]} urlObjects - Array of objects with 'url' and 'version'
* @param {(String|Object|Function)[]} urlObjects - See downloadFirstAvailableFile()
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful
*/
this.addPDFFromURLs = async function (item, urlObjects) {
@ -946,15 +949,13 @@ Zotero.Attachments = new function(){
var tmpDir;
var tmpFile;
var attachmentItem = false;
var urls = urlObjects.map(o => o.url);
try {
tmpDir = (await this.createTemporaryStorageDirectory()).path;
tmpFile = OS.Path.join(tmpDir, fileBaseName + '.pdf');
let url = await this.downloadFirstAvailableFile(
urls, tmpFile, { isPDF: true }
let { url, index } = await this.downloadFirstAvailableFile(
urlObjects, tmpFile, { isPDF: true }
);
if (url) {
let version = urlObjects[urls.indexOf(url)].version;
attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({
directory: tmpDir,
libraryID: item.libraryID,
@ -962,7 +963,7 @@ Zotero.Attachments = new function(){
url,
contentType: 'application/pdf',
parentItemID: item.id,
articleVersion: version
articleVersion: urlObjects[index].version
});
}
else {
@ -980,6 +981,121 @@ Zotero.Attachments = new function(){
};
/**
* Try to download a file from a list of URLs, keeping the first one that succeeds
*
* URLs are only tried once.
*
* @param {(String|Object|Function)[]} urlObjects - An array of URLs, objects, or functions
* that return arrays of objects. Objects can contain 'url' and/or 'pageURL', which is a
* webpage that might contain a translatable PDF link. Functions that return promises are
* waited for, and functions aren't called unless a file hasn't yet been found from an
* earlier entry.
* @param {String} path - Path to save file to
* @param {Object} [options] - Options to pass to this.downloadFile()
* @return {Object|false} - Object with successful 'url' and 'index' from original array, or
* false if no file could be downloaded
*/
this.downloadFirstAvailableFile = async function (urlObjects, path, options) {
// Operate on copy, since we might change things
urlObjects = [...urlObjects];
// Don't try the same URL more than once
var triedURLs = new Set();
var triedPages = new Set();
for (let i = 0; i < urlObjects.length; i++) {
let urlObject = urlObjects[i];
if (typeof urlObject == 'function') {
urlObject = await urlObject();
urlObjects.splice(i, 1, ...urlObject);
urlObject = urlObjects[i];
// No URLs returned from last function
if (!urlObject) {
break;
}
}
// Accept URL strings in addition to objects
if (typeof urlObject == 'string') {
urlObject = { url: urlObject };
}
let url = urlObject.url;
let pageURL = urlObject.pageURL;
let fromPage = false;
// Ignore URLs we've already tried
if (url && triedURLs.has(url)) {
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
url = null;
}
if (pageURL && triedPages.has(pageURL)) {
Zotero.debug(`Page at ${pageURL} was already tried -- skipping`);
pageURL = null;
}
// Try URL first if available
if (url) {
triedURLs.add(url);
try {
await this.downloadFile(url, path, options);
return { url, index: i };
}
catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`);
}
}
// If URL wasn't available or failed, try to get a URL from a page
if (pageURL) {
triedPages.add(pageURL);
url = null;
let responseURL;
try {
Zotero.debug(`Looking for PDF on ${pageURL}`);
// TODO: Handle redirects manually so we can avoid loading a page we've already
// tried
let xmlhttp = await Zotero.HTTP.request("GET", pageURL, { responseType: 'document' });
responseURL = xmlhttp.responseURL;
if (pageURL != responseURL) {
Zotero.debug("Redirected to " + responseURL);
}
triedPages.add(responseURL);
let doc = Zotero.HTTP.wrapDocument(xmlhttp.response, responseURL);
url = await Zotero.Utilities.Internal.getPDFFromDocument(doc);
}
catch (e) {
Zotero.debug(`Error getting PDF from ${pageURL}: ${e}`);
continue;
}
if (!url) {
Zotero.debug(`No PDF found on ${responseURL}`);
continue;
}
if (triedURLs.has(url)) {
Zotero.debug(`PDF at ${url} was already tried -- skipping`);
continue;
}
triedURLs.add(url);
// Use the page we loaded as the referrer
let downloadOptions = Object.assign({}, options, { referrer: responseURL });
try {
await this.downloadFile(url, path, downloadOptions);
return { url, index: i };
}
catch (e) {
Zotero.debug(`Error downloading ${url}: ${e}`);
}
}
}
return false;
};
/**
* @deprecated Use Zotero.Utilities.cleanURL instead
*/

View file

@ -2130,6 +2130,15 @@ Zotero.Item.prototype.numNonHTMLFileAttachments = function () {
};
Zotero.Item.prototype.numPDFAttachments = function () {
this._requireData('childItems');
return this.getAttachments()
.map(itemID => Zotero.Items.get(itemID))
.filter(item => item.isFileAttachment() && item.attachmentContentType == 'application/pdf')
.length;
};
Zotero.Item.prototype.getFile = function () {
Zotero.debug("Zotero.Item.prototype.getFile() is deprecated -- use getFilePath[Async]()", 2);

View file

@ -930,6 +930,13 @@ Zotero.Utilities.Internal = {
},
canFindPDFForItem: function (item) {
return item.isRegularItem()
&& (!!item.getField('DOI') || !!item.getField('url'))
&& item.numPDFAttachments() == 0;
},
/**
* Look for open-access PDFs for a given DOI using Zotero's Unpaywall mirror
*
@ -954,13 +961,45 @@ Zotero.Utilities.Internal = {
responseType: 'json'
});
var urls = req.response;
Zotero.debug(`Found ${urls.length} ${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`);
Zotero.debug(`Found ${urls.length} open-access PDF ${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`);
// Handle older URL-only format
urls = urls.map(o => typeof o == 'string' ? { url: o } : o);
return urls;
},
/**
* Run translation on a Document to try to find a PDF URL
*
* @param {doc} Document
* @return {String|false} - PDF URL, or false if none found
*/
getPDFFromDocument: async function (doc) {
let translate = new Zotero.Translate.Web();
translate.setDocument(doc);
var translators = await translate.getTranslators();
// TEMP: Until there's a generic webpage translator
if (!translators.length) {
return false;
}
translate.setTranslator(translators[0]);
var options = {
libraryID: false,
saveAttachments: true
};
let newItems = await translate.translate(options);
if (!newItems.length) {
return false;
}
for (let attachment of newItems[0].attachments) {
if (attachment.mimeType == 'application/pdf') {
return attachment.url;
}
}
return false;
},
/**
* Hyphenate an ISBN based on the registrant table available from
* https://www.isbn-international.org/range_file_generation

View file

@ -2753,6 +2753,8 @@ var ZoteroPane = new function()
'addNote',
'addAttachments',
'sep2',
'findPDF',
'sep3',
'toggleRead',
'duplicateItem',
'removeItems',
@ -2760,11 +2762,11 @@ var ZoteroPane = new function()
'moveToTrash',
'deleteFromLibrary',
'mergeItems',
'sep3',
'sep4',
'exportItems',
'createBib',
'loadReport',
'sep4',
'sep5',
'recognizePDF',
'unrecognize',
'reportMetadata',
@ -2803,7 +2805,7 @@ var ZoteroPane = new function()
}
if(!collectionTreeRow.isFeed()) {
show.push(m.sep3, m.exportItems, m.createBib, m.loadReport);
show.push(m.sep4, m.exportItems, m.createBib, m.loadReport);
}
var items = this.getSelectedItems();
@ -2813,13 +2815,18 @@ var ZoteroPane = new function()
if (items.length > 1) {
var multiple = '.multiple';
var canMerge = true, canIndex = true, canRecognize = true, canUnrecognize = true, canRename = true;
var canMerge = true,
canIndex = true,
canRecognize = true,
canUnrecognize = true,
canRename = true,
canFindPDF = true;
var canMarkRead = collectionTreeRow.isFeed();
var markUnread = true;
for (let i = 0; i < items.length; i++) {
let item = items[i];
if (canMerge && !item.isRegularItem() || item.isFeedItem || collectionTreeRow.isDuplicates()) {
if (canMerge && (!item.isRegularItem() || item.isFeedItem || collectionTreeRow.isDuplicates())) {
canMerge = false;
}
@ -2843,6 +2850,10 @@ var ZoteroPane = new function()
if(canMarkRead && markUnread && !item.isRead) {
markUnread = false;
}
if (canFindPDF && (!item.isRegularItem() || item.isFeedItem || collectionTreeRow.isDuplicates())) {
canFindPDF = false;
}
}
if (canMerge) {
@ -2870,6 +2881,10 @@ var ZoteroPane = new function()
}
}
if (canFindPDF) {
show.push(m.findPDF, m.sep3);
}
var canCreateParent = true;
for (let i = 0; i < items.length; i++) {
let item = items[i];
@ -2888,7 +2903,7 @@ var ZoteroPane = new function()
// Add in attachment separator
if (canCreateParent || canRecognize || canUnrecognize || canRename || canIndex) {
show.push(m.sep4);
show.push(m.sep5);
}
// Block certain actions on files if no access and at least one item
@ -2925,38 +2940,42 @@ var ZoteroPane = new function()
show.push(m.addNote, m.addAttachments, m.sep2);
}
if (Zotero.Utilities.Internal.canFindPDFForItem(item)) {
show.push(m.findPDF, m.sep3);
}
if (Zotero.RecognizePDF.canUnrecognize(item)) {
show.push(m.sep4, m.unrecognize, m.reportMetadata);
show.push(m.sep5, m.unrecognize, m.reportMetadata);
}
if (item.isAttachment()) {
var showSep4 = false;
var showSep5 = false;
if (Zotero.RecognizePDF.canRecognize(item)) {
show.push(m.recognizePDF);
showSep4 = true;
showSep5 = true;
}
// Allow parent item creation for standalone attachments
if (item.isTopLevelItem()) {
show.push(m.createParent);
showSep4 = true;
showSep5 = true;
}
// Attachment rename option
if (!item.isTopLevelItem() && item.attachmentLinkMode != Zotero.Attachments.LINK_MODE_LINKED_URL) {
show.push(m.renameAttachments);
showSep4 = true;
showSep5 = true;
}
// If not linked URL, show reindex line
if (yield Zotero.Fulltext.canReindex(item)) {
show.push(m.reindexItem);
showSep4 = true;
showSep5 = true;
}
if (showSep4) {
show.push(m.sep4);
if (showSep5) {
show.push(m.sep5);
}
}
else if (item.isFeedItem) {
@ -3037,6 +3056,7 @@ var ZoteroPane = new function()
}
// Set labels, plural if necessary
menu.childNodes[m.findPDF].setAttribute('label', Zotero.getString('pane.items.menu.findAvailablePDF' + multiple));
menu.childNodes[m.moveToTrash].setAttribute('label', Zotero.getString('pane.items.menu.moveToTrash' + multiple));
menu.childNodes[m.deleteFromLibrary].setAttribute('label', Zotero.getString('pane.items.menu.delete' + multiple));
menu.childNodes[m.exportItems].setAttribute('label', Zotero.getString('pane.items.menu.export' + multiple));
@ -3816,6 +3836,55 @@ var ZoteroPane = new function()
});
this.addPDFForSelectedItems = async function () {
if (!this.canEdit()) {
this.displayCannotEditLibraryMessage();
return;
}
var items = this.getSelectedItems();
var icon = 'chrome://zotero/skin/treeitem-attachment-pdf.png';
var progressWin = new Zotero.ProgressWindow();
// TODO: Localize
var title = items.length > 1 ? 'Searching for available PDFs…' : 'Searching for available PDF…';
progressWin.changeHeadline(title);
var itemProgress = new progressWin.ItemProgress(
icon,
"Checking " + items.length + " " + Zotero.Utilities.pluralize(items.length, ['item', 'items'])
);
progressWin.show();
var successful = 0;
for (let i = 0; i < items.length; i++) {
let item = items[i];
if (Zotero.Utilities.Internal.canFindPDFForItem(item)) {
let attachment = await Zotero.Attachments.addAvailablePDF(item);
if (attachment) {
successful++;
}
}
itemProgress.setProgress(((i + 1) / items.length) * 100);
}
itemProgress.setProgress(100);
itemProgress.setIcon(icon);
// TODO: Localize
if (successful) {
itemProgress.setText(
successful + " " + Zotero.Utilities.pluralize(successful, ['PDF', 'PDFs']) + " added"
);
}
else {
itemProgress.setText("No PDFs found")
}
progressWin.startCloseTimer(4000);
};
/**
* @return {Promise<Zotero.Item>|false}
*/

View file

@ -313,6 +313,8 @@
</menupopup>
</menu>
<menuseparator/>
<menuitem class="menuitem-iconic zotero-menuitem-find-pdf" oncommand="ZoteroPane.addPDFForSelectedItems()"/>
<menuseparator/>
<menuitem class="menuitem-iconic zotero-menuitem-toggle-read-item" oncommand="ZoteroPane_Local.toggleSelectedItemsRead();"/>
<menuitem class="menuitem-iconic zotero-menuitem-duplicate-item" label="&zotero.items.menu.duplicateItem;" oncommand="ZoteroPane_Local.duplicateSelectedItem().done();"/>
<menuitem class="menuitem-iconic zotero-menuitem-remove-items" oncommand="ZoteroPane_Local.deleteSelectedItems();"/>

View file

@ -283,6 +283,8 @@ pane.items.remove.multiple = Are you sure you want to remove the selected items
pane.items.removeFromPublications.title = Remove from My Publications
pane.items.removeFromPublications = Are you sure you want to remove the selected item from My Publications?
pane.items.removeFromPublications.multiple = Are you sure you want to remove the selected items from My Publications?
pane.items.menu.findAvailablePDF = Find Available PDF
pane.items.menu.findAvailablePDF.multiple = Find Available PDFs
pane.items.menu.remove = Remove Item from Collection…
pane.items.menu.remove.multiple = Remove Items from Collection…
pane.items.menu.removeFromPublications = Remove Item from My Publications…

View file

@ -337,6 +337,222 @@ describe("Zotero.Attachments", function() {
});
});
describe("#addAvailablePDF()", function () {
var doiPrefix = 'https://doi.org/';
var doi1 = '10.1111/abcd';
var doi2 = '10.2222/bcde';
var doi3 = '10.3333/cdef';
var doi4 = '10.4444/defg';
var pageURL1 = 'http://website/article1';
var pageURL2 = 'http://website/article2';
var pageURL3 = 'http://website/article3';
var pageURL4 = 'http://website/article4';
Components.utils.import("resource://zotero-unit/httpd.js");
var httpd;
var port = 16213;
var baseURL = `http://localhost:${port}/`;
var pdfURL = `${baseURL}article1/pdf`;
var pdfSize;
var stub;
before(async function () {
var origFunc = Zotero.HTTP.request.bind(Zotero.HTTP);
stub = sinon.stub(Zotero.HTTP, 'request');
stub.callsFake(function (method, url, options) {
// Page responses
var routes = [
// Page 1 contains a PDF
[pageURL1, pageURL1, true],
// DOI 1 redirects to page 1, which contains a PDF
[doiPrefix + doi1, pageURL1, true],
// DOI 2 redirects to page 2, which doesn't contain a PDF, but DOI 2 has an
// OA entry for the PDF URL
[doiPrefix + doi2, pageURL2, false],
// DOI 3 redirects to page 2, which doesn't contain a PDF, but DOI 3 contains
// an OA entry for page 3, which contains a PDF)
[doiPrefix + doi3, pageURL2, false],
[pageURL3, pageURL3, true],
// DOI 4 redirects to page 4, which doesn't contain a PDF
[doiPrefix + doi4, pageURL4, false]
];
for (let route of routes) {
let [expectedURL, responseURL, includePDF] = route;
if (url != expectedURL) continue;
var html = `<html>
<head>
<title>Page Title</title>
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
<meta name="citation_title" content="Title"/>
<meta name="${includePDF ? 'citation_pdf_url' : 'ignore'}" content="${pdfURL}"/>
</head>
<body>Body</body>
</html>`;
let parser = new DOMParser();
let doc = parser.parseFromString(html, 'text/html');
doc = Zotero.HTTP.wrapDocument(doc, responseURL);
return {
status: 200,
response: doc,
responseURL
};
}
// OA PDF lookup
if (url.startsWith(ZOTERO_CONFIG.SERVICES_URL)) {
let json = JSON.parse(options.body);
let response = [];
if (json.doi == doi2) {
response.push({
url: pdfURL,
version: 'submittedVersion'
});
}
else if (json.doi == doi3) {
response.push({
pageURL: pageURL3,
version: 'submittedVersion'
});
}
return {
status: 200,
response
};
}
return origFunc(...arguments);
});
pdfSize = await OS.File.stat(
OS.Path.join(getTestDataDirectory().path, 'test.pdf')
).size;
});
beforeEach(async function () {
httpd = new HttpServer();
httpd.start(port);
httpd.registerFile(
pdfURL.substr(baseURL.length - 1),
Zotero.File.pathToFile(OS.Path.join(getTestDataDirectory().path, 'test.pdf'))
);
});
afterEach(async function () {
stub.resetHistory();
await new Promise((resolve) => {
httpd.stop(() => resolve());
});
}.bind(this));
after(() => {
Zotero.HTTP.request.restore();
});
it("should add a PDF from a resolved DOI", async function () {
var doi = doi1;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledOnce);
assert.isTrue(stub.calledWith('GET', 'https://doi.org/' + doi));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("should add a PDF from a URL", async function () {
var url = pageURL1;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('url', url);
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledOnce);
assert.isTrue(stub.calledWith('GET', url));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("should add an OA PDF from a direct URL", async function () {
var doi = doi2;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledTwice);
var call1 = stub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = stub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("should add an OA PDF from a page URL", async function () {
var doi = doi3;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledThrice);
// Check the DOI (and get nothing)
var call1 = stub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
// Check the OA resolver and get page 3
var call2 = stub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
// Check page 3 and find the download URL
var call3 = stub.getCall(2);
assert.isTrue(call3.calledWith('GET', pageURL3));
assert.ok(attachment);
var json = attachment.toJSON();
assert.equal(json.url, pdfURL);
assert.equal(json.contentType, 'application/pdf');
assert.equal(json.filename, 'Test.pdf');
assert.equal(await OS.File.stat(attachment.getFilePath()).size, pdfSize);
});
it("shouldn't try the redirected DOI page again if also in the URL field", async function () {
var doi = doi4;
var item = createUnsavedDataObject('item', { itemType: 'journalArticle' });
item.setField('title', 'Test');
item.setField('DOI', doi);
item.setField('url', pageURL4);
await item.saveTx();
var attachment = await Zotero.Attachments.addAvailablePDF(item);
assert.isTrue(stub.calledTwice);
var call1 = stub.getCall(0);
assert.isTrue(call1.calledWith('GET', 'https://doi.org/' + doi));
var call2 = stub.getCall(1);
assert.isTrue(call2.calledWith('POST', ZOTERO_CONFIG.SERVICES_URL + 'oa/search'));
assert.isFalse(attachment);
});
});
describe("#getBaseDirectoryRelativePath()", function () {
it("should handle base directory at Windows drive root", function () {
Zotero.Prefs.set('baseAttachmentPath', "C:\\");