Automatically download open-access PDFs via Add Item by Identifier
When the associated-files pref is enabled, Add Item by Identifier uses a Zotero Unpaywall mirror to find available open-access PDFs. No details about the contents of searches are logged.
This commit is contained in:
parent
a9dcea7b13
commit
f3a62f5a63
5 changed files with 289 additions and 80 deletions
|
@ -64,10 +64,19 @@ var Zotero_Lookup = new function () {
|
|||
translate.setTranslator(translators);
|
||||
|
||||
try {
|
||||
yield translate.translate({
|
||||
let newItems = yield translate.translate({
|
||||
libraryID,
|
||||
collections: collection ? [collection.id] : false
|
||||
})
|
||||
});
|
||||
// If there's a DOI and we don't yet have a file, check for open-access PDFs
|
||||
if (identifier.DOI && !newItems.find(x => x.isImportedAttachment())) {
|
||||
try {
|
||||
yield Zotero.Attachments.addOpenAccessPDF(newItems[0]);
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.logError(e);
|
||||
}
|
||||
}
|
||||
successful++;
|
||||
}
|
||||
// Continue with other ids on failure
|
||||
|
|
|
@ -343,7 +343,7 @@ Zotero.Attachments = new function(){
|
|||
};
|
||||
|
||||
// Save using remote web browser persist
|
||||
var externalHandlerImport = Zotero.Promise.coroutine(function* (contentType) {
|
||||
var externalHandlerImport = async function (contentType) {
|
||||
// Rename attachment
|
||||
if (renameIfAllowedType && !fileBaseName && this.getRenamedFileTypes().includes(contentType)) {
|
||||
let parentItem = Zotero.Items.get(parentItemID);
|
||||
|
@ -351,91 +351,47 @@ Zotero.Attachments = new function(){
|
|||
}
|
||||
if (fileBaseName) {
|
||||
let ext = _getExtensionFromURL(url, contentType);
|
||||
var fileName = fileBaseName + (ext != '' ? '.' + ext : '');
|
||||
var filename = fileBaseName + (ext != '' ? '.' + ext : '');
|
||||
}
|
||||
else {
|
||||
var fileName = _getFileNameFromURL(url, contentType);
|
||||
var filename = _getFileNameFromURL(url, contentType);
|
||||
}
|
||||
|
||||
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
|
||||
var wbp = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
|
||||
.createInstance(nsIWBP);
|
||||
if(cookieSandbox) cookieSandbox.attachToInterfaceRequestor(wbp);
|
||||
var encodingFlags = false;
|
||||
|
||||
// Create a temporary directory to save to within the storage directory.
|
||||
// We don't use the normal temp directory because people might have 'storage'
|
||||
// symlinked to another volume, which makes moving complicated.
|
||||
var tmpDir = (yield this.createTemporaryStorageDirectory()).path;
|
||||
var tmpFile = OS.Path.join(tmpDir, fileName);
|
||||
// symlinked to another volume, which would make the save slower.
|
||||
var tmpDir = (await this.createTemporaryStorageDirectory()).path;
|
||||
var tmpFile = OS.Path.join(tmpDir, filename);
|
||||
|
||||
var attachmentItem;
|
||||
|
||||
// Save to temp dir
|
||||
var deferred = Zotero.Promise.defer();
|
||||
wbp.progressListener = new Zotero.WebProgressFinishListener(function() {
|
||||
deferred.resolve();
|
||||
});
|
||||
|
||||
var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
|
||||
.createInstance(Components.interfaces.nsIURL);
|
||||
nsIURL.spec = url;
|
||||
var headers = {};
|
||||
if (referrer) {
|
||||
headers.Referer = referrer;
|
||||
}
|
||||
Zotero.Utilities.Internal.saveURI(wbp, nsIURL, tmpFile, headers);
|
||||
|
||||
|
||||
yield deferred.promise;
|
||||
let sample = yield Zotero.File.getContentsAsync(tmpFile, null, 1000);
|
||||
try {
|
||||
if (contentType == 'application/pdf' &&
|
||||
Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
|
||||
let errString = "Downloaded PDF did not have MIME type "
|
||||
+ "'application/pdf' in Attachments.importFromURL()";
|
||||
Zotero.debug(errString, 2);
|
||||
Zotero.debug(sample, 3);
|
||||
throw(new Error(errString));
|
||||
}
|
||||
|
||||
// Create DB item
|
||||
var attachmentItem;
|
||||
var destDir;
|
||||
yield Zotero.DB.executeTransaction(function*() {
|
||||
// Create a new attachment
|
||||
attachmentItem = new Zotero.Item('attachment');
|
||||
if (libraryID) {
|
||||
attachmentItem.libraryID = libraryID;
|
||||
await this.downloadFile(
|
||||
url,
|
||||
tmpFile,
|
||||
{
|
||||
cookieSandbox,
|
||||
referrer,
|
||||
isPDF: contentType == 'application/pdf'
|
||||
}
|
||||
else if (parentItemID) {
|
||||
let {libraryID: parentLibraryID, key: parentKey} =
|
||||
Zotero.Items.getLibraryAndKeyFromID(parentItemID);
|
||||
attachmentItem.libraryID = parentLibraryID;
|
||||
}
|
||||
attachmentItem.setField('title', title ? title : fileName);
|
||||
attachmentItem.setField('url', url);
|
||||
attachmentItem.setField('accessDate', "CURRENT_TIMESTAMP");
|
||||
attachmentItem.parentID = parentItemID;
|
||||
attachmentItem.attachmentLinkMode = Zotero.Attachments.LINK_MODE_IMPORTED_URL;
|
||||
attachmentItem.attachmentContentType = contentType;
|
||||
if (collections) {
|
||||
attachmentItem.setCollections(collections);
|
||||
}
|
||||
attachmentItem.attachmentPath = 'storage:' + fileName;
|
||||
var itemID = yield attachmentItem.save(saveOptions);
|
||||
|
||||
Zotero.Fulltext.queueItem(attachmentItem);
|
||||
|
||||
// DEBUG: Does this fail if 'storage' is symlinked to another drive?
|
||||
destDir = this.getStorageDirectory(attachmentItem).path;
|
||||
yield OS.File.move(tmpDir, destDir);
|
||||
}.bind(this));
|
||||
} catch (e) {
|
||||
);
|
||||
|
||||
attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({
|
||||
directory: tmpDir,
|
||||
libraryID,
|
||||
parentItemID,
|
||||
title,
|
||||
filename,
|
||||
url,
|
||||
contentType,
|
||||
collections,
|
||||
saveOptions
|
||||
});
|
||||
}
|
||||
catch (e) {
|
||||
try {
|
||||
if (tmpDir) {
|
||||
yield OS.File.removeDir(tmpDir, { ignoreAbsent: true });
|
||||
}
|
||||
if (destDir) {
|
||||
yield OS.File.removeDir(destDir, { ignoreAbsent: true });
|
||||
await OS.File.removeDir(tmpDir, { ignoreAbsent: true });
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
|
@ -445,7 +401,7 @@ Zotero.Attachments = new function(){
|
|||
}
|
||||
|
||||
return attachmentItem;
|
||||
}.bind(this));
|
||||
}.bind(this);
|
||||
|
||||
var process = function (contentType, hasNativeHandler) {
|
||||
// If we can load this natively, use a hidden browser
|
||||
|
@ -466,6 +422,83 @@ Zotero.Attachments = new function(){
|
|||
});
|
||||
|
||||
|
||||
/**
|
||||
* Create an imported-URL attachment using a file downloaded to a temporary directory
|
||||
* in 'storage', moving the directory into place
|
||||
*
|
||||
* We download files to temporary 'storage' directories rather than the normal temporary
|
||||
* directory because people might have their storage directory on another device, which
|
||||
* would make the move a copy.
|
||||
*
|
||||
* @param {Object} options
|
||||
* @param {String} options.directory
|
||||
* @param {Number} options.libraryID
|
||||
* @param {String} options.filename
|
||||
* @param {String} options.url
|
||||
* @param {Number} [options.parentItemID]
|
||||
* @param {String} [options.title]
|
||||
* @param {String} options.contentType
|
||||
* @param {String[]} [options.collections]
|
||||
* @param {Object} [options.saveOptions]
|
||||
* @return {Zotero.Item}
|
||||
*/
|
||||
this.createURLAttachmentFromTemporaryStorageDirectory = async function (options) {
|
||||
if (!options.directory) throw new Error("'directory' not provided");
|
||||
if (!options.libraryID) throw new Error("'libraryID' not provided");
|
||||
if (!options.filename) throw new Error("'filename' not provided");
|
||||
if (!options.url) throw new Error("'directory' not provided");
|
||||
if (!options.contentType) throw new Error("'contentType' not provided");
|
||||
|
||||
var notifierQueue = (options.saveOptions && options.saveOptions.notifierQueue)
|
||||
|| new Zotero.Notifier.Queue;
|
||||
var attachmentItem = new Zotero.Item('attachment');
|
||||
try {
|
||||
// Create DB item
|
||||
if (options.libraryID) {
|
||||
attachmentItem.libraryID = options.libraryID;
|
||||
}
|
||||
else if (options.parentItemID) {
|
||||
let {libraryID: parentLibraryID, key: parentKey} =
|
||||
Zotero.Items.getLibraryAndKeyFromID(options.parentItemID);
|
||||
attachmentItem.libraryID = parentLibraryID;
|
||||
}
|
||||
attachmentItem.setField('title', options.title != undefined ? options.title : options.filename);
|
||||
attachmentItem.setField('url', options.url);
|
||||
attachmentItem.setField('accessDate', "CURRENT_TIMESTAMP");
|
||||
attachmentItem.parentID = options.parentItemID;
|
||||
attachmentItem.attachmentLinkMode = Zotero.Attachments.LINK_MODE_IMPORTED_URL;
|
||||
attachmentItem.attachmentContentType = options.contentType;
|
||||
if (options.collections) {
|
||||
attachmentItem.setCollections(options.collections);
|
||||
}
|
||||
attachmentItem.attachmentPath = 'storage:' + options.filename;
|
||||
await attachmentItem.saveTx(
|
||||
Object.assign(
|
||||
options.saveOptions || {},
|
||||
{ notifierQueue }
|
||||
)
|
||||
);
|
||||
|
||||
// Move file to final location
|
||||
let destDir = this.getStorageDirectory(attachmentItem).path;
|
||||
try {
|
||||
await OS.File.move(options.directory, destDir);
|
||||
}
|
||||
catch (e) {
|
||||
await attachmentItem.eraseTx({ notifierQueue });
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
finally {
|
||||
await Zotero.Notifier.commit(notifierQueue);
|
||||
}
|
||||
|
||||
Zotero.Fulltext.queueItem(attachmentItem);
|
||||
|
||||
return attachmentItem;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Create a link attachment from a URL
|
||||
*
|
||||
|
@ -709,8 +742,140 @@ Zotero.Attachments = new function(){
|
|||
|
||||
return attachmentItem;
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @param {String} url
|
||||
* @param {String} path
|
||||
* @param {Object} [options]
|
||||
* @param {Object} [options.cookieSandbox]
|
||||
* @param {String} [options.referrer]
|
||||
* @param {Boolean} [options.isPDF] - Delete file if not PDF
|
||||
*/
|
||||
this.downloadFile = async function (url, path, options = {}) {
|
||||
Zotero.debug(`Downloading ${url}`);
|
||||
|
||||
try {
|
||||
await new Zotero.Promise(function (resolve) {
|
||||
var wbp = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
|
||||
.createInstance(Components.interfaces.nsIWebBrowserPersist);
|
||||
if (options.cookieSandbox) {
|
||||
options.cookieSandbox.attachToInterfaceRequestor(wbp);
|
||||
}
|
||||
|
||||
wbp.progressListener = new Zotero.WebProgressFinishListener(() => resolve());
|
||||
|
||||
var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
|
||||
.createInstance(Components.interfaces.nsIURL);
|
||||
nsIURL.spec = url;
|
||||
var headers = {};
|
||||
if (options.referrer) {
|
||||
headers.Referer = options.referrer;
|
||||
}
|
||||
Zotero.Utilities.Internal.saveURI(wbp, nsIURL, path, headers);
|
||||
});
|
||||
|
||||
// If the file is supposed to be a PDF directory, fail if it's not
|
||||
let sample = await Zotero.File.getContentsAsync(path, null, 1000);
|
||||
if (options.isPDF && Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
|
||||
let errString = "Downloaded PDF was not a PDF";
|
||||
Zotero.debug(errString, 2);
|
||||
Zotero.debug(sample, 3);
|
||||
throw new Error(errString);
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
try {
|
||||
await OS.File.remove(path, { ignoreAbsent: true });
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug(e, 1);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Try to download a file from a list of URLs, keeping the first one that succeeds
|
||||
*
|
||||
*
|
||||
* @param {String[]} urls
|
||||
* @param {String} path
|
||||
* @param {Object} [options] - Options to pass to this.downloadFile()
|
||||
* @return {String|false} - URL that succeeded, or false if none
|
||||
*/
|
||||
this.downloadFirstAvailableFile = async function (urls, path, options) {
|
||||
var url;
|
||||
while (url = urls.shift()) {
|
||||
try {
|
||||
await this.downloadFile(url, path, options);
|
||||
return url;
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug(`Error downloading ${url}: ${e}`);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Look for an open-access PDF for an item and add it as an attachment
|
||||
*
|
||||
* @param {Zotero.Item} item
|
||||
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful
|
||||
*/
|
||||
this.addOpenAccessPDF = async function (item) {
|
||||
if (!Zotero.Prefs.get('downloadAssociatedFiles')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var doi = item.getField('DOI');
|
||||
if (!doi) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var urls = await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi);
|
||||
if (!urls.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var fileBaseName = this.getFileBaseNameFromItem(item);
|
||||
var tmpDir;
|
||||
var tmpFile;
|
||||
var attachmentItem = false;
|
||||
try {
|
||||
tmpDir = (await this.createTemporaryStorageDirectory()).path;
|
||||
tmpFile = OS.Path.join(tmpDir, fileBaseName + '.pdf');
|
||||
let url = await this.downloadFirstAvailableFile(
|
||||
urls, tmpFile, { isPDF: true }
|
||||
);
|
||||
if (url) {
|
||||
attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({
|
||||
directory: tmpDir,
|
||||
libraryID: item.libraryID,
|
||||
filename: OS.Path.basename(tmpFile),
|
||||
url,
|
||||
contentType: 'application/pdf',
|
||||
parentItemID: item.id
|
||||
});
|
||||
}
|
||||
else {
|
||||
await OS.File.removeDir(tmpDir);
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
if (tmpDir) {
|
||||
await OS.File.removeDir(tmpDir, { ignoreAbsent: true });
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
|
||||
return attachmentItem;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @deprecated Use Zotero.Utilities.cleanURL instead
|
||||
*/
|
||||
|
|
|
@ -930,6 +930,35 @@ Zotero.Utilities.Internal = {
|
|||
},
|
||||
|
||||
|
||||
/**
|
||||
* Look for open-access PDFs for a given DOI using Zotero's Unpaywall mirror
|
||||
*
|
||||
* Note: This uses a private API. Please use Unpaywall directly for non-Zotero projects.
|
||||
*
|
||||
* @param {String} doi
|
||||
* @return {String[]} - An array of PDF URLs
|
||||
*/
|
||||
getOpenAccessPDFURLs: async function (doi) {
|
||||
doi = Zotero.Utilities.cleanDOI(doi);
|
||||
if (!doi) {
|
||||
throw new Error(`Invalid DOI '${doi}'`);
|
||||
}
|
||||
Zotero.debug(`Looking for open-access PDFs for ${doi}`);
|
||||
|
||||
var url = ZOTERO_CONFIG.SERVICES_URL + 'oa/search';
|
||||
var req = await Zotero.HTTP.request('POST', url, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({ doi }),
|
||||
responseType: 'json'
|
||||
});
|
||||
var urls = req.response;
|
||||
Zotero.debug(`Found ${urls.length} ${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`);
|
||||
return urls;
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* Hyphenate an ISBN based on the registrant table available from
|
||||
* https://www.isbn-international.org/range_file_generation
|
||||
|
|
|
@ -12,6 +12,7 @@ var ZOTERO_CONFIG = {
|
|||
API_URL: 'https://api.zotero.org/',
|
||||
STREAMING_URL: 'wss://stream.zotero.org/',
|
||||
RECOGNIZE_URL: 'https://recognize.zotero.org/',
|
||||
SERVICES_URL: 'https://services.zotero.org/',
|
||||
API_VERSION: 3,
|
||||
CONNECTOR_MIN_VERSION: '5.0.39', // show upgrade prompt for requests from below this version
|
||||
PREF_BRANCH: 'extensions.zotero.',
|
||||
|
|
|
@ -49,6 +49,11 @@ describe("Add Item by Identifier", function() {
|
|||
});
|
||||
});
|
||||
|
||||
it.skip("should add a DOI with an open-access PDF");
|
||||
|
||||
// e.g., arXiv
|
||||
it.skip("should not add a PDF if a DOI already retrieves one");
|
||||
|
||||
it("should add a PMID", function() {
|
||||
this.timeout(10000);
|
||||
return lookupIdentifier(win, "24297125").then(function(ids) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue