Replace non-breaking spaces in tested lines in recognizePDF

Fixes "PDF does not contain OCRed text" message for
http://pdfserver.amlaw.com/nlj/NSA_ca2_20150507.pdf
This commit is contained in:
Dan Stillman 2015-05-07 13:41:13 -04:00
parent 3587bb0f6b
commit 45b3cd8a53

View file

@ -593,7 +593,11 @@ var Zotero_RecognizePDF = new function() {
const lineRe = /^[\s_]*([^\s]+(?: [^\s_]+)+)/;
var cleanedLines = [], cleanedLineLengths = [];
for(var i=0; i<lines.length && cleanedLines.length<100; i++) {
var m = lineRe.exec(lines[i]);
var m = lineRe.exec(
lines[i]
// Replace non-breaking spaces
.replace(/\xA0/g, ' ')
);
if(m && m[1].split(' ').length > 3) {
cleanedLines.push(m[1]);
cleanedLineLengths.push(m[1].length);