Trans: Changes to NYT: Use standard date when available, grab single page snapshot
This commit is contained in:
parent
9d78bd7024
commit
68c2a0039a
1 changed files with 25 additions and 5 deletions
|
@ -8,7 +8,7 @@
|
|||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":true,
|
||||
"lastUpdated":"2011-01-11 04:31:00"
|
||||
"lastUpdated":"2011-03-21 04:31:00"
|
||||
}
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
|
@ -38,6 +38,11 @@ function associateMeta(newItem, metaTags, field, zoteroField) {
|
|||
}
|
||||
|
||||
function scrape(doc, url) {
|
||||
var namespace = null;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == 'x') return namespace; else return null;
|
||||
} : null;
|
||||
|
||||
var newItem = new Zotero.Item("newspaperArticle");
|
||||
newItem.publicationTitle = "The New York Times";
|
||||
newItem.ISSN = "0362-4331";
|
||||
|
@ -65,8 +70,8 @@ function scrape(doc, url) {
|
|||
if(!metaTags["hdl"]) {
|
||||
return;
|
||||
}
|
||||
|
||||
newItem.attachments.push({url:url, title:"New York Times Snapshot",
|
||||
// We want to get everything on one page
|
||||
newItem.attachments.push({url:url.replace(/\.html\??([^/]*)(pagewanted=[^&]*)?([^/]*)$/,".html?pagewanted=all&$1$2"), title:"New York Times Snapshot",
|
||||
mimeType:"text/html"});
|
||||
} else {
|
||||
newItem.url = doc.location.href;
|
||||
|
@ -78,8 +83,16 @@ function scrape(doc, url) {
|
|||
metaTags[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
newItem.attachments.push({document:doc, title:"New York Times Snapshot"});
|
||||
// Get everything on one page is possible
|
||||
var singlePage = false;
|
||||
if (!newItem.url.match(/\?pagewanted=all/)
|
||||
&& (singlePage = doc.evaluate('//ul[@id="toolsList"]/li[@class="singlePage"]/a', doc, nsResolver,
|
||||
XPathResult.ANY_TYPE, null).iterateNext())) {
|
||||
newItem.attachments.push({url:singlePage.href, title:"New York Times Snapshot",
|
||||
mimeType:"text/html"});
|
||||
} else {
|
||||
newItem.attachments.push({document:doc, title:"New York Times Snapshot"});
|
||||
}
|
||||
}
|
||||
|
||||
associateMeta(newItem, metaTags, "dat", "date");
|
||||
|
@ -87,6 +100,10 @@ function scrape(doc, url) {
|
|||
associateMeta(newItem, metaTags, "dsk", "section");
|
||||
associateMeta(newItem, metaTags, "articleid", "accessionNumber");
|
||||
|
||||
if (metaTags["pdate"]) {
|
||||
newItem.date = metaTags["pdate"].replace(/(\d{4})(\d{2})(\d{2})/,"$1-$2-$3");
|
||||
}
|
||||
|
||||
if(metaTags["byl"]) {
|
||||
var author = Zotero.Utilities.trimInternal(metaTags["byl"]);
|
||||
if(author.substr(0, 3).toLowerCase() == "by ") {
|
||||
|
@ -118,6 +135,9 @@ function scrape(doc, url) {
|
|||
}
|
||||
}
|
||||
|
||||
// Remove pagewanted from URL in item (keeping other pieces, in case they might matter)
|
||||
newItem.url = newItem.url.replace(/\?([^/]*)pagewanted=[^&]*/,'');
|
||||
|
||||
newItem.complete();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue