Fix PMID and arXiv identifiers extraction ()

This commit is contained in:
Martynas Bagdonas 2018-05-07 13:04:11 +03:00 committed by Dan Stillman
parent e6dbd1ed92
commit 9165a0247f
2 changed files with 13 additions and 8 deletions
chrome/content/zotero/xpcom
test/tests

View file

@ -901,7 +901,7 @@ Zotero.Utilities.Internal = {
// arXiv identifiers are extracted without version number
// i.e. 0706.0044v1 is extracted as 0706.0044,
// because arXiv OAI API doesn't allow to access individual versions
let arXiv_RE = /((?:[^A-Za-z]|^)([\-A-Za-z\.]+\/\d{7})(?:(v[0-9]+)|)(?!\d))|((?:\D|^)(\d{4}.\d{4,5})(?:(v[0-9]+)|)(?!\d))/g;
let arXiv_RE = /((?:[^A-Za-z]|^)([\-A-Za-z\.]+\/\d{7})(?:(v[0-9]+)|)(?!\d))|((?:\D|^)(\d{4}\.\d{4,5})(?:(v[0-9]+)|)(?!\d))/g;
let m;
while ((m = arXiv_RE.exec(text))) {
let arXiv = m[2] || m[5];
@ -916,11 +916,11 @@ Zotero.Utilities.Internal = {
if (!identifiers.length) {
// PMID; right now, the longest PMIDs are 8 digits, so it doesn't seem like we'll
// need to discriminate for a fairly long time
let PMID_RE = /(?:\D|^)(\d{1,9})(?!\d)/g;
let PMID_RE = /(^|\s|,|:)(\d{1,9})(?=\s|,|$)/g;
let pmid;
while ((pmid = PMID_RE.exec(text)) && !foundIDs.has(pmid)) {
identifiers.push({
PMID: pmid[1]
PMID: pmid[2]
});
foundIDs.add(pmid);
}

View file

@ -147,15 +147,20 @@ describe("Zotero.Utilities.Internal", function () {
});
it("should extract PMID", async function () {
var id = "24297125";
var identifiers = ZUI.extractIdentifiers(id);
assert.lengthOf(identifiers, 1);
var identifiers = ZUI.extractIdentifiers("1 PMID:24297125,222 3-4 1234567890, 123456789");
assert.lengthOf(identifiers, 4);
assert.lengthOf(Object.keys(identifiers[0]), 1);
assert.propertyVal(identifiers[0], "PMID", id);
assert.lengthOf(Object.keys(identifiers[1]), 1);
assert.lengthOf(Object.keys(identifiers[2]), 1);
assert.lengthOf(Object.keys(identifiers[3]), 1);
assert.propertyVal(identifiers[0], "PMID", "1");
assert.propertyVal(identifiers[1], "PMID", "24297125");
assert.propertyVal(identifiers[2], "PMID", "222");
assert.propertyVal(identifiers[3], "PMID", "123456789");
});
it("should extract multiple old and new style arXivs", async function () {
var identifiers = ZUI.extractIdentifiers("0706.0044 arXiv:0706.00441v1,hep-ex/9809001v1, math.GT/0309135.");
var identifiers = ZUI.extractIdentifiers("0706.0044 arXiv:0706.00441v1,12345678,hep-ex/9809001v1, math.GT/0309135.");
assert.lengthOf(identifiers, 4);
assert.lengthOf(Object.keys(identifiers[0]), 1);
assert.lengthOf(Object.keys(identifiers[1]), 1);