Replace non-breaking spaces in tested lines in recognizePDF
Fixes "PDF does not contain OCRed text" message for http://pdfserver.amlaw.com/nlj/NSA_ca2_20150507.pdf
This commit is contained in:
parent
3587bb0f6b
commit
45b3cd8a53
1 changed files with 5 additions and 1 deletions
|
@ -593,7 +593,11 @@ var Zotero_RecognizePDF = new function() {
|
|||
const lineRe = /^[\s_]*([^\s]+(?: [^\s_]+)+)/;
|
||||
var cleanedLines = [], cleanedLineLengths = [];
|
||||
for(var i=0; i<lines.length && cleanedLines.length<100; i++) {
|
||||
var m = lineRe.exec(lines[i]);
|
||||
var m = lineRe.exec(
|
||||
lines[i]
|
||||
// Replace non-breaking spaces
|
||||
.replace(/\xA0/g, ' ')
|
||||
);
|
||||
if(m && m[1].split(' ').length > 3) {
|
||||
cleanedLines.push(m[1]);
|
||||
cleanedLineLengths.push(m[1].length);
|
||||
|
|
Loading…
Reference in a new issue