diff --git a/chrome/content/zotero/components/progressQueueTable.jsx b/chrome/content/zotero/components/progressQueueTable.jsx index 4b08ee4c44..c010c7639e 100644 --- a/chrome/content/zotero/components/progressQueueTable.jsx +++ b/chrome/content/zotero/components/progressQueueTable.jsx @@ -27,7 +27,7 @@ import PropTypes from 'prop-types'; import { getDOMElement } from 'components/icons'; import VirtualizedTable, { renderCell } from 'components/virtualized-table'; -import { noop } from './utils'; +import { nextHTMLID, noop } from './utils'; function getImageByStatus(status) { @@ -45,8 +45,9 @@ function getImageByStatus(status) { const ProgressQueueTable = ({ onActivate = noop, progressQueue }) => { const treeRef = useRef(null); + const htmlID = useRef(nextHTMLID()); - const getRowCount = useCallback(() => progressQueue.getRows().length, [progressQueue]); + const getRowCount = useCallback(() => progressQueue.getTotal(), [progressQueue]); const rowToTreeItem = useCallback((index, selection, oldDiv = null, columns) => { let rows = progressQueue.getRows(); @@ -92,6 +93,7 @@ const ProgressQueueTable = ({ onActivate = noop, progressQueue }) => { progressQueue.addListener('rowadded', refreshTree); progressQueue.addListener('rowupdated', refreshTree); progressQueue.addListener('rowdeleted', refreshTree); + return () => { progressQueue.removeListener('rowadded', refreshTree); progressQueue.removeListener('rowupdated', refreshTree); @@ -103,7 +105,7 @@ const ProgressQueueTable = ({ onActivate = noop, progressQueue }) => { . ***** END LICENSE BLOCK ***** */ -'use strict'; - const noop = () => {}; + function getDragTargetOrient(event, target) { const elem = target || event.target; const {y, height} = elem.getBoundingClientRect(); @@ -72,9 +71,30 @@ function createDragHandler({ handleDrag, handleDragStop }) { return { start: onDragStart, stop: onDragStop - } + }; } -export { - noop, getDragTargetOrient, createDragHandler +var _htmlID = 1; + +const nextHTMLID = (prefix = 'id-') => prefix + _htmlID++; + +const scrollIntoViewIfNeeded = (element, container, opts = {}) => { + const containerTop = container.scrollTop; + const containerBottom = containerTop + container.clientHeight; + const elementTop = element.offsetTop; + const elementBottom = elementTop + element.clientHeight; + + if (elementTop < containerTop || elementBottom > containerBottom) { + const before = container.scrollTop; + element.scrollIntoView(opts); + const after = container.scrollTop; + return after - before; + } + return 0; +}; + +const stopPropagation = ev => ev.stopPropagation(); + +export { + nextHTMLID, noop, getDragTargetOrient, createDragHandler, scrollIntoViewIfNeeded, stopPropagation }; diff --git a/chrome/content/zotero/fileInterface.js b/chrome/content/zotero/fileInterface.js index 86b9ca2a9b..f78306aa86 100644 --- a/chrome/content/zotero/fileInterface.js +++ b/chrome/content/zotero/fileInterface.js @@ -448,6 +448,15 @@ var Zotero_File_Interface = new function() { translation.createNewCollection = createNewCollection; translation.mendeleyCode = options.mendeleyCode; } + else if (options.folder) { + Components.utils.import("chrome://zotero/content/import/folderImport.js"); + translation = new Zotero_Import_Folder({ + folder: options.folder, + recreateStructure: options.recreateStructure, + fileTypes: options.fileTypes, + mimeTypes: options.mimeTypes, + }); + } else { // Check if the file is an SQLite database var sample = yield Zotero.File.getSample(file.path); diff --git a/chrome/content/zotero/import/folderImport.js b/chrome/content/zotero/import/folderImport.js new file mode 100644 index 0000000000..e3bfc0f693 --- /dev/null +++ b/chrome/content/zotero/import/folderImport.js @@ -0,0 +1,241 @@ +var EXPORTED_SYMBOLS = ["Zotero_Import_Folder"]; // eslint-disable-line no-unused-vars + +Components.utils.import("resource://gre/modules/Services.jsm"); +Services.scriptloader.loadSubScript("chrome://zotero/content/include.js"); +const multimatch = require('multimatch'); + +const collectFilesRecursive = async (dirPath, parents = [], files = []) => { + await Zotero.File.iterateDirectory(dirPath, async ({ isDir, _isSymlink, name, path }) => { + if (isDir) { + await collectFilesRecursive(path, [...parents, name], files); + } + // TODO: Also check for hidden file attribute on windows? + else if (!name.startsWith('.')) { + files.push({ parents, path, name }); + } + }); + return files; +}; + +const findCollection = (libraryID, parentCollectionID, collectionName) => { + const collections = parentCollectionID + ? Zotero.Collections.getByParent(parentCollectionID) + : Zotero.Collections.getByLibrary(libraryID); + + return collections.find(c => c.name === collectionName); +}; + +const findItemByHash = async (libraryID, hash) => { + let items = (await Zotero.Relations.getByPredicateAndObject('item', 'zotero:attachmentHash', hash)) + .filter(item => item.libraryID == libraryID && !item.deleted && item.isTopLevelItem()); + + if (!items.length) { + items = (await Zotero.Relations.getByPredicateAndObject('item', 'zotero:fileHash', hash)) + .filter(item => item.libraryID == libraryID && !item.deleted && item.isTopLevelItem()); + } + + if (!items.length) { + return null; + } + + return items[0]; +}; + +class Zotero_Import_Folder { // eslint-disable-line camelcase,no-unused-vars + constructor({ mimeTypes = ['application/pdf'], fileTypes, folder, libraryID, recreateStructure }) { + this.folder = folder; + this.libraryID = libraryID; + this.newItems = []; + this.recreateStructure = recreateStructure; + this.fileTypes = fileTypes && fileTypes.length ? fileTypes.split(',').map(ft => ft.trim()) : []; + this._progress = 0; + this._progressMax = 0; + this._itemDone = () => {}; + this.types = mimeTypes; // whitelist of mime types to process + } + + setLocation(folder) { + this.folder = folder; + } + + setHandler(name, handler) { + switch (name) { + case 'itemDone': + this._itemDone = handler; + break; + } + } + + setTranslator() {} + + getProgress() { + return this._progress / this._progressMax * 100; + } + + async getTranslators() { + return [{ label: 'Folder import' }]; + } + + async translate({ collections = [], linkFiles = false } = {}) { + const libraryID = this.libraryID || Zotero.Libraries.userLibraryID; + const files = await collectFilesRecursive(this.folder); + + // import is done in four phases: sniff for mime type, calculate md5, import as attachment, recognize. + // hence number of files is multiplied by 4 to determine max progress + this._progressMax = files.length * 4; + + const mimeTypes = await Promise.all(files.map( + async ({ path }) => { + const mimeType = Zotero.MIME.sniffForMIMEType(await Zotero.File.getSample(path)); + this._progress++; + this._itemDone(); + return mimeType; + } + )); + + const fileHashes = await Promise.all(files.map( + async ({ name, path }, index) => { + const contentType = mimeTypes[index]; + this._progress++; + if (!(this.types.includes(contentType) || multimatch(name, this.fileTypes, { nocase: true }).length > 0)) { + // don't bother calculating a hash for file that will be ignored + return null; + } + const md5Hash = await Zotero.Utilities.Internal.md5Async(path); + this._itemDone(); + return md5Hash; + } + )); + + files.forEach((fileData, index) => { + fileData.parentCollectionIDs = (collections && collections.length) ? [...collections] : []; + fileData.mimeType = mimeTypes[index]; + }); + + if (this.recreateStructure) { + for (const fileData of files) { + const { parents } = fileData; + let prevParentCollectionID = null; + if (parents.length) { + prevParentCollectionID = (collections && collections.length) ? collections[0] : null; + for (const parentName of parents) { + const parentCollection = findCollection(libraryID, prevParentCollectionID, parentName) || new Zotero.Collection; + parentCollection.libraryID = libraryID; + parentCollection.name = parentName; + if (prevParentCollectionID) { + parentCollection.parentID = prevParentCollectionID; + } + await parentCollection.saveTx({ skipSelect: true }); //eslint-disable-line no-await-in-loop + prevParentCollectionID = parentCollection.id; + } + } + if (prevParentCollectionID) { + fileData.parentCollectionIDs = [prevParentCollectionID]; + } + } + } + + // index files by hash to avoid importing duplicate files. Keep track of where duplicates were found so that + // duplicate item is still added to one collection per folder + const fileDataByHash = {}; + files.forEach((fileData, index) => { + const hash = fileHashes[index]; + if (hash in fileDataByHash) { + fileDataByHash[hash].parentCollectionIDs.push(...fileData.parentCollectionIDs); + } + else { + fileDataByHash[hash] = fileData; + } + }); + + // advance progress to account for duplicates found within file structure + // these files won't be imported nor recognized so advance 2 ticks per file + this._progress += 2 * (files.length - Object.keys(fileDataByHash).length); + this._itemDone(); + + const attachmentItemHashLookup = {}; + const attachmentItems = await Promise.all(Object.entries(fileDataByHash).map( + async ([hash, { name, path, parentCollectionIDs, mimeType }]) => { + const options = { + collections: parentCollectionIDs, + contentType: mimeType, + file: path, + libraryID, + }; + + let attachmentItem = null; + + if ((this.types.includes(mimeType) || multimatch(name, this.fileTypes, { nocase: true }).length > 0)) { + const existingItem = await findItemByHash(libraryID, hash); + + if (existingItem) { + existingItem.setCollections([...existingItem.getCollections(), ...parentCollectionIDs]); + existingItem.saveTx({ skipSelect: true }); + } + else { + if (linkFiles) { + attachmentItem = await Zotero.Attachments.linkFromFile(options); + } + else { + attachmentItem = await Zotero.Attachments.importFromFile(options); + } + + this.newItems.push(attachmentItem); + attachmentItemHashLookup[attachmentItem.id] = hash; + } + } + + if (attachmentItem && !Zotero.RecognizePDF.canRecognize(attachmentItem)) { + attachmentItem.setRelations({ 'zotero:fileHash': hash }); + await attachmentItem.saveTx({ skipSelect: true }); + attachmentItem = null; + } + this._progress++; + this._itemDone(); + return attachmentItem; + } + )); + + + // discard unrecognizable items, increase progress for discarded items + const recognizableItems = attachmentItems.filter(item => item !== null); + this._progress += attachmentItems.length - recognizableItems.length; + this._itemDone(); + + const recognizeQueue = Zotero.ProgressQueues.get('recognize'); + const itemsToSavePostRecognize = []; + + const processRecognizedItem = ({ status, id }) => { + const updatedItem = recognizableItems.find(i => i.id === id); + if (status === Zotero.ProgressQueue.ROW_SUCCEEDED) { + const recognizedItem = updatedItem.parentItem; + if (recognizedItem && id in attachmentItemHashLookup) { + recognizedItem.setRelations({ 'zotero:attachmentHash': attachmentItemHashLookup[id] }); + itemsToSavePostRecognize.push(recognizedItem); + } + } + if (status === Zotero.ProgressQueue.ROW_FAILED) { + if (updatedItem && id in attachmentItemHashLookup) { + updatedItem.setRelations({ 'zotero:fileHash': attachmentItemHashLookup[id] }); + itemsToSavePostRecognize.push(updatedItem); + } + } + if ([Zotero.ProgressQueue.ROW_FAILED, Zotero.ProgressQueue.ROW_SUCCEEDED].includes(status)) { + this._progress++; + this._itemDone(); + } + }; + + recognizeQueue.addListener('rowupdated', processRecognizedItem); + try { + await Zotero.RecognizePDF.recognizeItems(recognizableItems); + } + finally { + recognizeQueue.removeListener('rowupdated', processRecognizedItem); + } + + await Zotero.Promise.all( + itemsToSavePostRecognize.map(async item => item.saveTx({ skipSelect: true })) + ); + } +} diff --git a/chrome/content/zotero/xpcom/data/relations.js b/chrome/content/zotero/xpcom/data/relations.js index c830091fb1..ab14cd38fe 100644 --- a/chrome/content/zotero/xpcom/data/relations.js +++ b/chrome/content/zotero/xpcom/data/relations.js @@ -33,7 +33,8 @@ Zotero.Relations = new function () { this._namespaces = { dc: 'http://purl.org/dc/elements/1.1/', owl: 'http://www.w3.org/2002/07/owl#', - mendeleyDB: 'http://zotero.org/namespaces/mendeleyDB#' + mendeleyDB: 'http://zotero.org/namespaces/mendeleyDB#', + zotero: 'http://zotero.org/namespaces/zotero' }; var _types = ['collection', 'item']; diff --git a/package.json b/package.json index 23e96cae40..61b3f32b5c 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "colors": "^1.4.0", "eslint": "^8.5.0", "eslint-plugin-react": "^7.28.0", + "eslint-plugin-react-hooks": "^4.0.4", "fs-extra": "^3.0.1", "globby": "^6.1.0", "jspath": "^0.4.0", diff --git a/resource/config.js b/resource/config.js index 452fe1870b..67c3a5760f 100644 --- a/resource/config.js +++ b/resource/config.js @@ -33,8 +33,9 @@ var ZOTERO_CONFIG = { PLUGINS_URL: 'https://www.zotero.org/support/plugins', }; -if (typeof process === 'object' && process + '' === '[object process]'){ +if (typeof exports === 'object' && typeof module !== 'undefined') { module.exports = ZOTERO_CONFIG; -} else { +} +else { var EXPORTED_SYMBOLS = ["ZOTERO_CONFIG"]; -} \ No newline at end of file +} diff --git a/scripts/config.js b/scripts/config.js index d09363f581..dab51d0a0d 100644 --- a/scripts/config.js +++ b/scripts/config.js @@ -97,7 +97,14 @@ const browserifyConfigs = [ config: { standalone: 'chaiAsPromised' } - } + }, + { + src: 'node_modules/multimatch/index.js', + dest: 'resource/multimatch.js', + config: { + standalone: 'multimatch' + } + }, ]; // exclude mask used for js, copy, symlink and sass tasks diff --git a/test/tests/folderImportTest.js b/test/tests/folderImportTest.js new file mode 100644 index 0000000000..d6609876ec --- /dev/null +++ b/test/tests/folderImportTest.js @@ -0,0 +1,136 @@ +/* global Zotero_Import_Folder: false */ + +describe('Zotero_Import_Folder', function () { + var tmpDir; + const uc = (name) => 'Zotero_Import_Folder_' + name; + + before(async () => { + tmpDir = await getTempDirectory(); + + await OS.File.makeDir(OS.Path.join(tmpDir, uc('dir1'))); + await OS.File.makeDir(OS.Path.join(tmpDir, uc('dir1'), uc('subdir1'))); + await OS.File.makeDir(OS.Path.join(tmpDir, uc('dir2'))); + + await OS.File.copy( + OS.Path.join(getTestDataDirectory().path, 'recognizePDF_test_title.pdf'), + OS.Path.join(tmpDir, 'recognizePDF_test_title.pdf') + ); + await OS.File.copy( + OS.Path.join(getTestDataDirectory().path, 'recognizePDF_test_title.pdf'), + OS.Path.join(tmpDir, uc('dir1'), 'recognizePDF_test_title.pdf') + ); + await OS.File.copy( + OS.Path.join(getTestDataDirectory().path, 'recognizePDF_test_arXiv.pdf'), + OS.Path.join(tmpDir, uc('dir1'), uc('subdir1'), 'recognizePDF_test_arXiv.pdf') + ); + await OS.File.copy( + OS.Path.join(getTestDataDirectory().path, 'recognizePDF_test_title.pdf'), + OS.Path.join(tmpDir, uc('dir2'), 'recognizePDF_test_title.pdf') + ); + await OS.File.copy( + OS.Path.join(getTestDataDirectory().path, 'test.png'), + OS.Path.join(tmpDir, uc('dir2'), 'test.png') + ); + await OS.File.copy( + OS.Path.join(getTestDataDirectory().path, 'test.html'), + OS.Path.join(tmpDir, uc('dir2'), 'test.html') + ); + await OS.File.copy( + OS.Path.join(getTestDataDirectory().path, 'test.txt'), + OS.Path.join(tmpDir, uc('dir2'), 'test.txt') + ); + + Components.utils.import('chrome://zotero/content/import/folderImport.js'); + }); + + describe('#import', () => { + it('should import PDFs from a folder and recreate structure without creating duplicates', async function () { + this.timeout(30000); + if (Zotero.automatedTest) { + this.skip(); + } + + const importer = new Zotero_Import_Folder({ + folder: tmpDir, + recreateStructure: true, + }); + + await importer.translate({ + libraryID: Zotero.Libraries.userLibraryID, + linkFiles: true, + }); + + assert.equal(importer.newItems.length, 2); + + const firstPDFAttachment = importer.newItems.find(ni => ni.getField('title') === 'recognizePDF_test_arXiv.pdf'); + const firstPDFItem = await Zotero.Items.getAsync(firstPDFAttachment.parentID); + const firstPDFCollections = await Zotero.Collections.getAsync(firstPDFItem.getCollections()); + assert.equal(firstPDFItem.getField('title'), 'Scaling study of an improved fermion action on quenched lattices'); + assert.equal(firstPDFCollections.length, 1); + assert.equal(firstPDFCollections[0].name, uc('subdir1')); + assert.equal((await Zotero.Collections.getAsync(firstPDFCollections[0].parentID)).name, uc('dir1')); + + const secondPDFAttachment = importer.newItems.find(ni => ni.getField('title') === 'recognizePDF_test_title.pdf'); + const secondPDFItem = await Zotero.Items.getAsync(secondPDFAttachment.parentID); + const secondPDFCollections = await Zotero.Collections.getAsync(secondPDFItem.getCollections()); + assert.equal(secondPDFItem.getField('title'), 'Bitcoin: A Peer-to-Peer Electronic Cash System'); + assert.equal(secondPDFCollections.length, 2); + assert.sameMembers(secondPDFCollections.map(c => c.name), [uc('dir1'), uc('dir2')]); + + assert.sameMembers( + Zotero.Collections.getByLibrary(Zotero.Libraries.userLibraryID, true) + .map(c => c.name) + .filter(c => c.startsWith('Zotero_Import_Folder')), + [uc('dir1'), uc('dir2'), uc('subdir1')] + ); + + const importer2 = new Zotero_Import_Folder({ + folder: tmpDir, + recreateStructure: true, + }); + + await importer2.translate({ + libraryID: Zotero.Libraries.userLibraryID, + linkFiles: true, + }); + + assert.lengthOf(importer2.newItems, 0); + assert.sameMembers( + Zotero.Collections.getByLibrary(Zotero.Libraries.userLibraryID, true) + .map(c => c.name) + .filter(c => c.startsWith('Zotero_Import_Folder')), + [uc('dir1'), uc('dir2'), uc('subdir1')] + ); + }); + + it('should only import specified file types from a folder', async function () { + this.timeout(30000); + if (Zotero.automatedTest) { + this.skip(); + } + const importer = new Zotero_Import_Folder({ + folder: tmpDir, + recreateStructure: false, + fileTypes: '*.png,*.txt', + mimeTypes: [] + }); + + await importer.translate({ + libraryID: Zotero.Libraries.userLibraryID, + linkFiles: true, + }); + + assert.equal(importer.newItems.length, 2); + const pngItem = importer.newItems.find(ni => ni.getField('title') === 'test.png'); + assert.isDefined(pngItem); + assert.isFalse(pngItem.parentID); + + const txtItem = importer.newItems.find(ni => ni.getField('title') === 'test.txt'); + assert.isDefined(txtItem); + assert.isFalse(txtItem.parentID); + + const htmlItem = importer.newItems.find(ni => ni.getField('title') === 'test.html'); + assert.isUndefined(htmlItem); + }); + }); +});