From 567953e57c4ba033c818d06d56f7c6a9b6941a0c Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 3 Aug 2009 04:58:08 +0000 Subject: [PATCH] - Adds Zotero.Item.attachmentContent property -- works for HTML, PDF, and plaintext attachments - For PDFs, stop deleting full-text cache file after indexing, and regenerate when getting content if necessary - MozMill tests Other MozMill changes: - Added Cmd-Shift-M shortcut key to open window - this.controller now available in Zotero.MozMill if passed in from setupModule() with Zotero.MozMill.setController(module.controller); - Various new Zotero.MozMill methods, including for creating file attachments --- chrome/content/zotero/xpcom/data/item.js | 97 +++++++++++++++++++++++- chrome/content/zotero/xpcom/fulltext.js | 37 +++++---- 2 files changed, 119 insertions(+), 15 deletions(-) diff --git a/chrome/content/zotero/xpcom/data/item.js b/chrome/content/zotero/xpcom/data/item.js index 9a6900723b..98c51833fd 100644 --- a/chrome/content/zotero/xpcom/data/item.js +++ b/chrome/content/zotero/xpcom/data/item.js @@ -2288,7 +2288,7 @@ Zotero.Item.prototype.setNote = function(text) { } if (typeof text != 'string') { - throw ("text must be a string in Zotero.Item.setNote()"); + throw ("text must be a string in Zotero.Item.setNote() (was " + typeof text + ")"); } text = Zotero.Utilities.prototype.trim(text); @@ -2902,6 +2902,101 @@ Zotero.Item.prototype.__defineGetter__('attachmentModificationTime', function () }); +/** + * Return plain text of attachment content + * + * - Currently works on HTML, PDF and plaintext attachments + * - Paragraph breaks will be lost in PDF content + * - For PDFs, will return empty string if Zotero.Fulltext.pdfConverterIsRegistered() is false + * + * @return {String} Attachment text, or empty string if unavailable + */ +Zotero.Item.prototype.__defineGetter__('attachmentText', function () { + if (!this.isAttachment()) { + return undefined; + } + + if (!this.id) { + return null; + } + + var file = this.getFile(); + var cacheFile = Zotero.Fulltext.getItemCacheFile(this.id); + if (!file) { + if (cacheFile.exists()) { + var str = Zotero.File.getContents(cacheFile); + + // TODO: remove post-Fx3.0 + if (!str.trim) { + return Zotero.Utilities.prototype.trim(str); + } + + return str.trim(); + } + return ''; + } + + var mimeType = this.attachmentMIMEType; + if (!mimeType) { + mimeType = Zotero.MIME.getMIMETypeFromFile(file); + if (mimeType) { + this.attachmentMIMEType = mimeType; + this.save(); + } + } + + var str; + if (Zotero.Fulltext.isCachedMIMEType(mimeType)) { + var reindex = false; + + if (!cacheFile.exists()) { + Zotero.debug("Regenerating item " + this.id + " full-text cache file"); + reindex = true; + } + // Fully index item if it's not yet + else if (!Zotero.Fulltext.isFullyIndexed(this.id)) { + Zotero.debug("Item " + this.id + " is not fully indexed -- caching now"); + reindex = true; + } + + if (reindex) { + if (!Zotero.Fulltext.pdfConverterIsRegistered()) { + Zotero.debug("PDF converter is unavailable -- returning empty .attachmentText", 3); + return ''; + } + Zotero.Fulltext.indexItems(this.id, false); + } + + if (!cacheFile.exists()) { + Zotero.debug("Cache file doesn't exist after indexing -- returning empty .attachmentText"); + return ''; + } + str = Zotero.File.getContents(cacheFile); + } + + else if (mimeType == 'text/html') { + str = Zotero.File.getContents(file); + str = Zotero.Utilities.prototype.unescapeHTML(str); + } + + else if (mimeType == 'text/plain') { + str = Zotero.File.getContents(file); + } + + else { + return ''; + } + + // TODO: remove post-Fx3.0 + if (!str.trim) { + return Zotero.Utilities.prototype.trim(str); + } + + return str.trim(); +}); + + + /** * Returns child attachments of this item * diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js index 99d7a48f95..d3fdab5369 100644 --- a/chrome/content/zotero/xpcom/fulltext.js +++ b/chrome/content/zotero/xpcom/fulltext.js @@ -265,7 +265,7 @@ Zotero.Fulltext = new function(){ Zotero.DB.beginTransaction(); - this.clearItemWords(itemID); + this.clearItemWords(itemID, true); this.indexWords(itemID, words); /* @@ -475,7 +475,6 @@ Zotero.Fulltext = new function(){ if (items.constructor.name != 'Array') { items = [items]; } - var items = Zotero.Items.get(items); var found = []; @@ -601,7 +600,7 @@ Zotero.Fulltext = new function(){ var mimeType = i.attachmentMIMEType; if (isCachedMIMEType(mimeType)) { - var file = _getItemCacheFile(i.id); + var file = this.getItemCacheFile(i.id); if (!file.exists()) { continue; } @@ -632,7 +631,7 @@ Zotero.Fulltext = new function(){ } - function clearItemWords(itemID){ + function clearItemWords(itemID, skipCacheClear) { Zotero.DB.beginTransaction(); var sql = "SELECT rowid FROM fulltextItems WHERE itemID=? LIMIT 1"; var indexed = Zotero.DB.valueQuery(sql, itemID); @@ -646,8 +645,10 @@ Zotero.Fulltext = new function(){ Zotero.Prefs.set('purge.fulltext', true); } - // Delete fulltext cache file if there is one - this.clearCacheFile(itemID); + if (!skipCacheClear) { + // Delete fulltext cache file if there is one + this.clearCacheFile(itemID); + } } @@ -793,6 +794,14 @@ Zotero.Fulltext = new function(){ } + this.isFullyIndexed = function (itemID) { + if (!itemID) { + throw ("itemID not provided in Zotero.Fulltext.isFullyIndexed()"); + } + return this.getIndexedState(itemID) == this.INDEX_STATE_INDEXED; + } + + function getIndexStats() { var sql = "SELECT COUNT(*) FROM fulltextItems WHERE " + "(indexedPages IS NOT NULL AND indexedPages=totalPages) OR " @@ -817,6 +826,13 @@ Zotero.Fulltext = new function(){ } + this.getItemCacheFile = function (itemID) { + var cacheFile = Zotero.Attachments.getStorageDirectory(itemID); + cacheFile.append(self.pdfConverterCacheFile); + return cacheFile; + } + + /* * Returns true if an item can be reindexed * @@ -912,7 +928,7 @@ Zotero.Fulltext = new function(){ Zotero.debug('Clearing full-text cache file for item ' + itemID); switch (item.attachmentMIMEType) { case 'application/pdf': - var cacheFile = _getItemCacheFile(itemID); + var cacheFile = this.getItemCacheFile(itemID); if (cacheFile.exists()) { cacheFile.remove(false); } @@ -1052,11 +1068,4 @@ Zotero.Fulltext = new function(){ text = text.replace("zoteroapostrophe", "'"); return text; } - - - function _getItemCacheFile(itemID) { - var cacheFile = Zotero.Attachments.getStorageDirectory(itemID); - cacheFile.append(self.pdfConverterCacheFile); - return cacheFile; - } }