Automatically download open-access PDFs via Add Item by Identifier
When the associated-files pref is enabled, Add Item by Identifier uses a Zotero Unpaywall mirror to find available open-access PDFs. No details about the contents of searches are logged.
This commit is contained in:
parent
a9dcea7b13
commit
f3a62f5a63
5 changed files with 289 additions and 80 deletions
|
@ -64,10 +64,19 @@ var Zotero_Lookup = new function () {
|
||||||
translate.setTranslator(translators);
|
translate.setTranslator(translators);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
yield translate.translate({
|
let newItems = yield translate.translate({
|
||||||
libraryID,
|
libraryID,
|
||||||
collections: collection ? [collection.id] : false
|
collections: collection ? [collection.id] : false
|
||||||
})
|
});
|
||||||
|
// If there's a DOI and we don't yet have a file, check for open-access PDFs
|
||||||
|
if (identifier.DOI && !newItems.find(x => x.isImportedAttachment())) {
|
||||||
|
try {
|
||||||
|
yield Zotero.Attachments.addOpenAccessPDF(newItems[0]);
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
Zotero.logError(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
successful++;
|
successful++;
|
||||||
}
|
}
|
||||||
// Continue with other ids on failure
|
// Continue with other ids on failure
|
||||||
|
|
|
@ -343,7 +343,7 @@ Zotero.Attachments = new function(){
|
||||||
};
|
};
|
||||||
|
|
||||||
// Save using remote web browser persist
|
// Save using remote web browser persist
|
||||||
var externalHandlerImport = Zotero.Promise.coroutine(function* (contentType) {
|
var externalHandlerImport = async function (contentType) {
|
||||||
// Rename attachment
|
// Rename attachment
|
||||||
if (renameIfAllowedType && !fileBaseName && this.getRenamedFileTypes().includes(contentType)) {
|
if (renameIfAllowedType && !fileBaseName && this.getRenamedFileTypes().includes(contentType)) {
|
||||||
let parentItem = Zotero.Items.get(parentItemID);
|
let parentItem = Zotero.Items.get(parentItemID);
|
||||||
|
@ -351,91 +351,47 @@ Zotero.Attachments = new function(){
|
||||||
}
|
}
|
||||||
if (fileBaseName) {
|
if (fileBaseName) {
|
||||||
let ext = _getExtensionFromURL(url, contentType);
|
let ext = _getExtensionFromURL(url, contentType);
|
||||||
var fileName = fileBaseName + (ext != '' ? '.' + ext : '');
|
var filename = fileBaseName + (ext != '' ? '.' + ext : '');
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
var fileName = _getFileNameFromURL(url, contentType);
|
var filename = _getFileNameFromURL(url, contentType);
|
||||||
}
|
}
|
||||||
|
|
||||||
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
|
|
||||||
var wbp = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
|
|
||||||
.createInstance(nsIWBP);
|
|
||||||
if(cookieSandbox) cookieSandbox.attachToInterfaceRequestor(wbp);
|
|
||||||
var encodingFlags = false;
|
|
||||||
|
|
||||||
// Create a temporary directory to save to within the storage directory.
|
// Create a temporary directory to save to within the storage directory.
|
||||||
// We don't use the normal temp directory because people might have 'storage'
|
// We don't use the normal temp directory because people might have 'storage'
|
||||||
// symlinked to another volume, which makes moving complicated.
|
// symlinked to another volume, which would make the save slower.
|
||||||
var tmpDir = (yield this.createTemporaryStorageDirectory()).path;
|
var tmpDir = (await this.createTemporaryStorageDirectory()).path;
|
||||||
var tmpFile = OS.Path.join(tmpDir, fileName);
|
var tmpFile = OS.Path.join(tmpDir, filename);
|
||||||
|
|
||||||
// Save to temp dir
|
var attachmentItem;
|
||||||
var deferred = Zotero.Promise.defer();
|
|
||||||
wbp.progressListener = new Zotero.WebProgressFinishListener(function() {
|
|
||||||
deferred.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
|
|
||||||
.createInstance(Components.interfaces.nsIURL);
|
|
||||||
nsIURL.spec = url;
|
|
||||||
var headers = {};
|
|
||||||
if (referrer) {
|
|
||||||
headers.Referer = referrer;
|
|
||||||
}
|
|
||||||
Zotero.Utilities.Internal.saveURI(wbp, nsIURL, tmpFile, headers);
|
|
||||||
|
|
||||||
|
|
||||||
yield deferred.promise;
|
|
||||||
let sample = yield Zotero.File.getContentsAsync(tmpFile, null, 1000);
|
|
||||||
try {
|
try {
|
||||||
if (contentType == 'application/pdf' &&
|
await this.downloadFile(
|
||||||
Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
|
url,
|
||||||
let errString = "Downloaded PDF did not have MIME type "
|
tmpFile,
|
||||||
+ "'application/pdf' in Attachments.importFromURL()";
|
{
|
||||||
Zotero.debug(errString, 2);
|
cookieSandbox,
|
||||||
Zotero.debug(sample, 3);
|
referrer,
|
||||||
throw(new Error(errString));
|
isPDF: contentType == 'application/pdf'
|
||||||
}
|
|
||||||
|
|
||||||
// Create DB item
|
|
||||||
var attachmentItem;
|
|
||||||
var destDir;
|
|
||||||
yield Zotero.DB.executeTransaction(function*() {
|
|
||||||
// Create a new attachment
|
|
||||||
attachmentItem = new Zotero.Item('attachment');
|
|
||||||
if (libraryID) {
|
|
||||||
attachmentItem.libraryID = libraryID;
|
|
||||||
}
|
}
|
||||||
else if (parentItemID) {
|
);
|
||||||
let {libraryID: parentLibraryID, key: parentKey} =
|
|
||||||
Zotero.Items.getLibraryAndKeyFromID(parentItemID);
|
|
||||||
attachmentItem.libraryID = parentLibraryID;
|
|
||||||
}
|
|
||||||
attachmentItem.setField('title', title ? title : fileName);
|
|
||||||
attachmentItem.setField('url', url);
|
|
||||||
attachmentItem.setField('accessDate', "CURRENT_TIMESTAMP");
|
|
||||||
attachmentItem.parentID = parentItemID;
|
|
||||||
attachmentItem.attachmentLinkMode = Zotero.Attachments.LINK_MODE_IMPORTED_URL;
|
|
||||||
attachmentItem.attachmentContentType = contentType;
|
|
||||||
if (collections) {
|
|
||||||
attachmentItem.setCollections(collections);
|
|
||||||
}
|
|
||||||
attachmentItem.attachmentPath = 'storage:' + fileName;
|
|
||||||
var itemID = yield attachmentItem.save(saveOptions);
|
|
||||||
|
|
||||||
Zotero.Fulltext.queueItem(attachmentItem);
|
attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({
|
||||||
|
directory: tmpDir,
|
||||||
// DEBUG: Does this fail if 'storage' is symlinked to another drive?
|
libraryID,
|
||||||
destDir = this.getStorageDirectory(attachmentItem).path;
|
parentItemID,
|
||||||
yield OS.File.move(tmpDir, destDir);
|
title,
|
||||||
}.bind(this));
|
filename,
|
||||||
} catch (e) {
|
url,
|
||||||
|
contentType,
|
||||||
|
collections,
|
||||||
|
saveOptions
|
||||||
|
});
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
try {
|
try {
|
||||||
if (tmpDir) {
|
if (tmpDir) {
|
||||||
yield OS.File.removeDir(tmpDir, { ignoreAbsent: true });
|
await OS.File.removeDir(tmpDir, { ignoreAbsent: true });
|
||||||
}
|
|
||||||
if (destDir) {
|
|
||||||
yield OS.File.removeDir(destDir, { ignoreAbsent: true });
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (e) {
|
catch (e) {
|
||||||
|
@ -445,7 +401,7 @@ Zotero.Attachments = new function(){
|
||||||
}
|
}
|
||||||
|
|
||||||
return attachmentItem;
|
return attachmentItem;
|
||||||
}.bind(this));
|
}.bind(this);
|
||||||
|
|
||||||
var process = function (contentType, hasNativeHandler) {
|
var process = function (contentType, hasNativeHandler) {
|
||||||
// If we can load this natively, use a hidden browser
|
// If we can load this natively, use a hidden browser
|
||||||
|
@ -466,6 +422,83 @@ Zotero.Attachments = new function(){
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an imported-URL attachment using a file downloaded to a temporary directory
|
||||||
|
* in 'storage', moving the directory into place
|
||||||
|
*
|
||||||
|
* We download files to temporary 'storage' directories rather than the normal temporary
|
||||||
|
* directory because people might have their storage directory on another device, which
|
||||||
|
* would make the move a copy.
|
||||||
|
*
|
||||||
|
* @param {Object} options
|
||||||
|
* @param {String} options.directory
|
||||||
|
* @param {Number} options.libraryID
|
||||||
|
* @param {String} options.filename
|
||||||
|
* @param {String} options.url
|
||||||
|
* @param {Number} [options.parentItemID]
|
||||||
|
* @param {String} [options.title]
|
||||||
|
* @param {String} options.contentType
|
||||||
|
* @param {String[]} [options.collections]
|
||||||
|
* @param {Object} [options.saveOptions]
|
||||||
|
* @return {Zotero.Item}
|
||||||
|
*/
|
||||||
|
this.createURLAttachmentFromTemporaryStorageDirectory = async function (options) {
|
||||||
|
if (!options.directory) throw new Error("'directory' not provided");
|
||||||
|
if (!options.libraryID) throw new Error("'libraryID' not provided");
|
||||||
|
if (!options.filename) throw new Error("'filename' not provided");
|
||||||
|
if (!options.url) throw new Error("'directory' not provided");
|
||||||
|
if (!options.contentType) throw new Error("'contentType' not provided");
|
||||||
|
|
||||||
|
var notifierQueue = (options.saveOptions && options.saveOptions.notifierQueue)
|
||||||
|
|| new Zotero.Notifier.Queue;
|
||||||
|
var attachmentItem = new Zotero.Item('attachment');
|
||||||
|
try {
|
||||||
|
// Create DB item
|
||||||
|
if (options.libraryID) {
|
||||||
|
attachmentItem.libraryID = options.libraryID;
|
||||||
|
}
|
||||||
|
else if (options.parentItemID) {
|
||||||
|
let {libraryID: parentLibraryID, key: parentKey} =
|
||||||
|
Zotero.Items.getLibraryAndKeyFromID(options.parentItemID);
|
||||||
|
attachmentItem.libraryID = parentLibraryID;
|
||||||
|
}
|
||||||
|
attachmentItem.setField('title', options.title != undefined ? options.title : options.filename);
|
||||||
|
attachmentItem.setField('url', options.url);
|
||||||
|
attachmentItem.setField('accessDate', "CURRENT_TIMESTAMP");
|
||||||
|
attachmentItem.parentID = options.parentItemID;
|
||||||
|
attachmentItem.attachmentLinkMode = Zotero.Attachments.LINK_MODE_IMPORTED_URL;
|
||||||
|
attachmentItem.attachmentContentType = options.contentType;
|
||||||
|
if (options.collections) {
|
||||||
|
attachmentItem.setCollections(options.collections);
|
||||||
|
}
|
||||||
|
attachmentItem.attachmentPath = 'storage:' + options.filename;
|
||||||
|
await attachmentItem.saveTx(
|
||||||
|
Object.assign(
|
||||||
|
options.saveOptions || {},
|
||||||
|
{ notifierQueue }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Move file to final location
|
||||||
|
let destDir = this.getStorageDirectory(attachmentItem).path;
|
||||||
|
try {
|
||||||
|
await OS.File.move(options.directory, destDir);
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
await attachmentItem.eraseTx({ notifierQueue });
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
await Zotero.Notifier.commit(notifierQueue);
|
||||||
|
}
|
||||||
|
|
||||||
|
Zotero.Fulltext.queueItem(attachmentItem);
|
||||||
|
|
||||||
|
return attachmentItem;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a link attachment from a URL
|
* Create a link attachment from a URL
|
||||||
*
|
*
|
||||||
|
@ -711,6 +744,138 @@ Zotero.Attachments = new function(){
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {String} url
|
||||||
|
* @param {String} path
|
||||||
|
* @param {Object} [options]
|
||||||
|
* @param {Object} [options.cookieSandbox]
|
||||||
|
* @param {String} [options.referrer]
|
||||||
|
* @param {Boolean} [options.isPDF] - Delete file if not PDF
|
||||||
|
*/
|
||||||
|
this.downloadFile = async function (url, path, options = {}) {
|
||||||
|
Zotero.debug(`Downloading ${url}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await new Zotero.Promise(function (resolve) {
|
||||||
|
var wbp = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
|
||||||
|
.createInstance(Components.interfaces.nsIWebBrowserPersist);
|
||||||
|
if (options.cookieSandbox) {
|
||||||
|
options.cookieSandbox.attachToInterfaceRequestor(wbp);
|
||||||
|
}
|
||||||
|
|
||||||
|
wbp.progressListener = new Zotero.WebProgressFinishListener(() => resolve());
|
||||||
|
|
||||||
|
var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
|
||||||
|
.createInstance(Components.interfaces.nsIURL);
|
||||||
|
nsIURL.spec = url;
|
||||||
|
var headers = {};
|
||||||
|
if (options.referrer) {
|
||||||
|
headers.Referer = options.referrer;
|
||||||
|
}
|
||||||
|
Zotero.Utilities.Internal.saveURI(wbp, nsIURL, path, headers);
|
||||||
|
});
|
||||||
|
|
||||||
|
// If the file is supposed to be a PDF directory, fail if it's not
|
||||||
|
let sample = await Zotero.File.getContentsAsync(path, null, 1000);
|
||||||
|
if (options.isPDF && Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
|
||||||
|
let errString = "Downloaded PDF was not a PDF";
|
||||||
|
Zotero.debug(errString, 2);
|
||||||
|
Zotero.debug(sample, 3);
|
||||||
|
throw new Error(errString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
try {
|
||||||
|
await OS.File.remove(path, { ignoreAbsent: true });
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
Zotero.debug(e, 1);
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to download a file from a list of URLs, keeping the first one that succeeds
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param {String[]} urls
|
||||||
|
* @param {String} path
|
||||||
|
* @param {Object} [options] - Options to pass to this.downloadFile()
|
||||||
|
* @return {String|false} - URL that succeeded, or false if none
|
||||||
|
*/
|
||||||
|
this.downloadFirstAvailableFile = async function (urls, path, options) {
|
||||||
|
var url;
|
||||||
|
while (url = urls.shift()) {
|
||||||
|
try {
|
||||||
|
await this.downloadFile(url, path, options);
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
Zotero.debug(`Error downloading ${url}: ${e}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look for an open-access PDF for an item and add it as an attachment
|
||||||
|
*
|
||||||
|
* @param {Zotero.Item} item
|
||||||
|
* @return {Zotero.Item|false} - New attachment item, or false if unsuccessful
|
||||||
|
*/
|
||||||
|
this.addOpenAccessPDF = async function (item) {
|
||||||
|
if (!Zotero.Prefs.get('downloadAssociatedFiles')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
var doi = item.getField('DOI');
|
||||||
|
if (!doi) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
var urls = await Zotero.Utilities.Internal.getOpenAccessPDFURLs(doi);
|
||||||
|
if (!urls.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
var fileBaseName = this.getFileBaseNameFromItem(item);
|
||||||
|
var tmpDir;
|
||||||
|
var tmpFile;
|
||||||
|
var attachmentItem = false;
|
||||||
|
try {
|
||||||
|
tmpDir = (await this.createTemporaryStorageDirectory()).path;
|
||||||
|
tmpFile = OS.Path.join(tmpDir, fileBaseName + '.pdf');
|
||||||
|
let url = await this.downloadFirstAvailableFile(
|
||||||
|
urls, tmpFile, { isPDF: true }
|
||||||
|
);
|
||||||
|
if (url) {
|
||||||
|
attachmentItem = await this.createURLAttachmentFromTemporaryStorageDirectory({
|
||||||
|
directory: tmpDir,
|
||||||
|
libraryID: item.libraryID,
|
||||||
|
filename: OS.Path.basename(tmpFile),
|
||||||
|
url,
|
||||||
|
contentType: 'application/pdf',
|
||||||
|
parentItemID: item.id
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
await OS.File.removeDir(tmpDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
if (tmpDir) {
|
||||||
|
await OS.File.removeDir(tmpDir, { ignoreAbsent: true });
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
return attachmentItem;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @deprecated Use Zotero.Utilities.cleanURL instead
|
* @deprecated Use Zotero.Utilities.cleanURL instead
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -930,6 +930,35 @@ Zotero.Utilities.Internal = {
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look for open-access PDFs for a given DOI using Zotero's Unpaywall mirror
|
||||||
|
*
|
||||||
|
* Note: This uses a private API. Please use Unpaywall directly for non-Zotero projects.
|
||||||
|
*
|
||||||
|
* @param {String} doi
|
||||||
|
* @return {String[]} - An array of PDF URLs
|
||||||
|
*/
|
||||||
|
getOpenAccessPDFURLs: async function (doi) {
|
||||||
|
doi = Zotero.Utilities.cleanDOI(doi);
|
||||||
|
if (!doi) {
|
||||||
|
throw new Error(`Invalid DOI '${doi}'`);
|
||||||
|
}
|
||||||
|
Zotero.debug(`Looking for open-access PDFs for ${doi}`);
|
||||||
|
|
||||||
|
var url = ZOTERO_CONFIG.SERVICES_URL + 'oa/search';
|
||||||
|
var req = await Zotero.HTTP.request('POST', url, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ doi }),
|
||||||
|
responseType: 'json'
|
||||||
|
});
|
||||||
|
var urls = req.response;
|
||||||
|
Zotero.debug(`Found ${urls.length} ${Zotero.Utilities.pluralize(urls.length, ['URL', 'URLs'])}`);
|
||||||
|
return urls;
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hyphenate an ISBN based on the registrant table available from
|
* Hyphenate an ISBN based on the registrant table available from
|
||||||
* https://www.isbn-international.org/range_file_generation
|
* https://www.isbn-international.org/range_file_generation
|
||||||
|
|
|
@ -12,6 +12,7 @@ var ZOTERO_CONFIG = {
|
||||||
API_URL: 'https://api.zotero.org/',
|
API_URL: 'https://api.zotero.org/',
|
||||||
STREAMING_URL: 'wss://stream.zotero.org/',
|
STREAMING_URL: 'wss://stream.zotero.org/',
|
||||||
RECOGNIZE_URL: 'https://recognize.zotero.org/',
|
RECOGNIZE_URL: 'https://recognize.zotero.org/',
|
||||||
|
SERVICES_URL: 'https://services.zotero.org/',
|
||||||
API_VERSION: 3,
|
API_VERSION: 3,
|
||||||
CONNECTOR_MIN_VERSION: '5.0.39', // show upgrade prompt for requests from below this version
|
CONNECTOR_MIN_VERSION: '5.0.39', // show upgrade prompt for requests from below this version
|
||||||
PREF_BRANCH: 'extensions.zotero.',
|
PREF_BRANCH: 'extensions.zotero.',
|
||||||
|
|
|
@ -49,6 +49,11 @@ describe("Add Item by Identifier", function() {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it.skip("should add a DOI with an open-access PDF");
|
||||||
|
|
||||||
|
// e.g., arXiv
|
||||||
|
it.skip("should not add a PDF if a DOI already retrieves one");
|
||||||
|
|
||||||
it("should add a PMID", function() {
|
it("should add a PMID", function() {
|
||||||
this.timeout(10000);
|
this.timeout(10000);
|
||||||
return lookupIdentifier(win, "24297125").then(function(ids) {
|
return lookupIdentifier(win, "24297125").then(function(ids) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue