Use pdf-worker to extract text for item.attachmentContent if no cache

This commit is contained in:
Dan Stillman 2023-04-29 17:37:50 -04:00
parent adf2d92471
commit 4ec357bb26

View file

@ -3565,24 +3565,10 @@ Zotero.defineProperty(Zotero.Item.prototype, 'attachmentText', {
let data = JSON.parse(json); let data = JSON.parse(json);
str = data.text; str = data.text;
} }
// Otherwise extract text to temporary file and read that // Otherwise extract text
else if (contentType == 'application/pdf') { else if (contentType == 'application/pdf') {
let tmpCacheFile = OS.Path.join( let { text } = await Zotero.PDFWorker.getFullText(this.id);
Zotero.getTempDirectory().path, Zotero.Utilities.randomString() str = text;
);
let { exec, args } = Zotero.FullText.getPDFConverterExecAndArgs();
args.push(
'-nopgbrk',
path,
tmpCacheFile
);
await Zotero.Utilities.Internal.exec(exec, args);
if (!await OS.File.exists(tmpCacheFile)) {
Zotero.logError("Cache file not found after running PDF converter");
return '';
}
str = await Zotero.File.getContentsAsync(tmpCacheFile);
await OS.File.remove(tmpCacheFile);
} }
else { else {
Zotero.logError("Unsupported cached file type in .attachmentText"); Zotero.logError("Unsupported cached file type in .attachmentText");