Don't truncate HTML file content in full-text sync
Also write a cache file for HTML files at index time
This commit is contained in:
parent
62840f3b93
commit
ccf2a05c49
1 changed files with 33 additions and 6 deletions
|
@ -370,9 +370,22 @@ Zotero.Fulltext = new function(){
|
|||
text = text.replace(/(>)/g, '$1 ');
|
||||
text = this.HTMLToText(text);
|
||||
this.indexString(text, document.characterSet, itemID);
|
||||
|
||||
var charsIndexed = Math.min(maxLength, text.length);
|
||||
this.setChars(itemID, { indexed: charsIndexed, total: text.length });
|
||||
|
||||
// Write the converted text to a cache file
|
||||
Q.fcall(function () {
|
||||
let cacheFile = self.getItemCacheFile(itemID);
|
||||
Zotero.debug("Writing converted full-text HTML content to " + cacheFile.path);
|
||||
if (!cacheFile.parent.exists()) {
|
||||
Zotero.Attachments.createDirectoryForItem(itemID);
|
||||
}
|
||||
return Zotero.File.putContentsAsync(cacheFile, text);
|
||||
})
|
||||
.catch(function (e) {
|
||||
Zotero.debug(e, 1);
|
||||
Components.utils.reportError(e);
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
|
@ -597,6 +610,7 @@ Zotero.Fulltext = new function(){
|
|||
* @return {Array<Object>}
|
||||
*/
|
||||
this.getUnsyncedContent = function (maxChars) {
|
||||
var maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
|
||||
var first = true;
|
||||
var chars = 0;
|
||||
var contentItems = [];
|
||||
|
@ -632,15 +646,18 @@ Zotero.Fulltext = new function(){
|
|||
}
|
||||
|
||||
Zotero.debug("Adding full-text content from file for item " + libraryKey);
|
||||
text = Zotero.File.getContents(
|
||||
file, item.attachmentCharset, row.indexedChars
|
||||
);
|
||||
text = Zotero.File.getContents(file, item.attachmentCharset, maxLength);
|
||||
|
||||
// Split elements to avoid word concatentation
|
||||
// If HTML, convert to plain text first, and cache the result
|
||||
if (item.attachmentMIMEType == 'text/html') {
|
||||
// Split elements to avoid word concatentation
|
||||
text = text.replace(/(>)/g, '$1 ');
|
||||
|
||||
text = this.HTMLToText(text);
|
||||
|
||||
// Include in the cache file only as many characters as we've indexed
|
||||
text = text.substr(0, row.indexedChars);
|
||||
|
||||
// Write the converted text to a cache file
|
||||
Zotero.debug("Writing converted full-text HTML content to "
|
||||
+ cacheFile.path);
|
||||
|
@ -651,7 +668,11 @@ Zotero.Fulltext = new function(){
|
|||
.catch(function (e) {
|
||||
Zotero.debug(e, 1);
|
||||
Components.utils.reportError(e);
|
||||
})
|
||||
});
|
||||
}
|
||||
else {
|
||||
// Include only as many characters as we've indexed
|
||||
text = text.substr(0, row.indexedChars);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1067,6 +1088,12 @@ Zotero.Fulltext = new function(){
|
|||
|
||||
content = this.HTMLToText(content);
|
||||
|
||||
// Include in the cache file only as many characters as we've indexed
|
||||
let chars = this.getChars(itemID);
|
||||
if (chars && chars.indexedChars) {
|
||||
content = content.substr(0, chars.indexedChars);
|
||||
}
|
||||
|
||||
// Write the converted text to a cache file for future searches
|
||||
Zotero.debug("Writing converted full-text content to " + cacheFile.path);
|
||||
if (!cacheFile.parent.exists()) {
|
||||
|
|
Loading…
Add table
Reference in a new issue