2006-10-03 21:08:02 +00:00
-- 96
2006-10-02 01:07:56 +00:00
DROP TABLE IF EXISTS translators ;
CREATE TABLE translators (
translatorID TEXT PRIMARY KEY ,
lastUpdated DATETIME ,
inRepository INT ,
priority INT ,
translatorType INT ,
label TEXT ,
creator TEXT ,
target TEXT ,
detectCode TEXT ,
code TEXT
) ;
DROP INDEX IF EXISTS translators_type ;
CREATE INDEX translators_type ON translators ( translatorType ) ;
2006-06-15 06:13:02 +00:00
2006-10-02 21:25:47 +00:00
DROP TABLE IF EXISTS csl ;
CREATE TABLE csl (
cslID TEXT PRIMARY KEY ,
updated DATETIME ,
title TEXT ,
csl TEXT
) ;
2006-06-15 06:13:02 +00:00
-- Set the following timestamp to the most recent scraper update date
2006-10-02 23:15:27 +00:00
REPLACE INTO " version " VALUES ( ' repository ' , STRFTIME ( ' %s ' , ' 2006-10-02 17:00:00 ' ) ) ;
2006-06-15 06:13:02 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 96b9f483-c44d-5784-cdad-ce21b984fe01 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Amazon.com ' , ' Simon Kornblith ' , ' ^http://www\.amazon\.com/ ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
2006-09-04 20:19:38 +00:00
var searchRe = new RegExp ( ' ' ^ http : / / ( ? : www \ . ) ? amazon \ . com / ( gp / search / | exec / obidos / search - handle - url / | s / ) ' ' ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( searchRe . test ( doc . location . href ) ) {
return " multiple " ;
} else {
2006-08-31 22:36:05 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var xpath = ' ' / html / body / table / tbody / tr / td [ 2 ] / table / tbody / tr / td [ @ class = " bucket " ] / div [ @ class = " content " ] / ul / li ' ' ;
if ( doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
return " book " ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-26 18:05:23 +00:00
}
' ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
' function scrape(doc) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " book " ) ;
2006-06-06 18:25:45 +00:00
2006-06-23 03:02:30 +00:00
/ / Retrieve authors
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
try {
2006-08-11 16:34:22 +00:00
var xpath = ' ' / html / body / table / tbody / tr / td [ 2 ] / form / div [ @ class = " buying " ] / a / text ( ) [ 1 ] ' ' ;
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var elmt ;
while ( elmt = elmts . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( elmt . nodeValue , " author " ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-10-02 23:15:27 +00:00
} catch ( ex ) { Zotero . Utilities . debug ( ex ) ; }
2006-06-23 03:02:30 +00:00
/ / Retrieve data from " Product Details " box
var xpath = ' ' / html / body / table / tbody / tr / td [ 2 ] / table / tbody / tr / td [ @ class = " bucket " ] / div [ @ class = " content " ] / ul / li ' ' ;
2006-08-11 16:34:22 +00:00
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var elmt ;
2006-08-17 07:56:01 +00:00
newItem . extra = " " ;
2006-08-11 16:34:22 +00:00
while ( elmt = elmts . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
try {
2006-10-02 23:15:27 +00:00
var attribute = Zotero . Utilities . cleanString ( doc . evaluate ( ' ' . / B [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ) ;
var value = Zotero . Utilities . getNodeString ( doc , elmt , ' ' . / descendant - or - self : : * [ name ( ) ! = " B " ] / text ( ) ' ' , nsResolver ) ;
2006-08-17 07:56:01 +00:00
if ( value ) {
2006-10-02 23:15:27 +00:00
value = Zotero . Utilities . cleanString ( value ) ;
2006-08-17 07:56:01 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( attribute = = " Publisher: " ) {
if ( value . lastIndexOf ( " ( " ) ! = - 1 ) {
2006-08-31 00:04:11 +00:00
newItem . date = value . substring ( value . lastIndexOf ( " ( " ) + 1 , value . length - 1 ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
value = value . substring ( 0 , value . lastIndexOf ( " ( " ) - 1 ) ;
2006-06-23 03:02:30 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( value . lastIndexOf ( " ; " ) ! = - 1 ) {
newItem . edition = value . substring ( value . lastIndexOf ( " ; " ) + 2 , value . length ) ;
value = value . substring ( 0 , value . lastIndexOf ( " ; " ) ) ;
}
newItem . publisher = value ;
} else if ( attribute = = " ISBN: " ) {
newItem . ISBN = value ;
2006-08-17 07:56:01 +00:00
} else if ( value . substring ( value . indexOf ( " " ) + 1 , value . length ) = = " pages " ) {
newItem . pages = value . substring ( 0 , value . indexOf ( " " ) ) ;
} else if ( attribute ! = " Average Customer Review: " ) {
if ( attribute = = " In-Print Editions: " ) {
value = value . replace ( " | All Editions " , " " ) ;
} else {
value = value . replace ( / \ ( [ ^ ) ] * \ ) / g , " " ) ;
}
newItem . extra + = attribute + " " + value + " \n " ;
2006-06-23 03:02:30 +00:00
}
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} catch ( ex ) { }
2006-06-06 18:25:45 +00:00
}
2006-06-23 03:02:30 +00:00
2006-08-17 07:56:01 +00:00
if ( newItem . extra ) {
newItem . extra = newItem . extra . substr ( 0 , newItem . extra . length - 1 ) ;
}
newItem . attachments . push ( { title : " Amazon.com Product Page " , document : doc } ) ;
2006-08-11 16:34:22 +00:00
var xpath = ' ' / html / body / table / tbody / tr / td [ 2 ] / form / div [ @ class = " buying " ] / b [ @ class = " sans " ] / text ( ) [ 1 ] ' ' ;
var title = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
2006-10-02 23:15:27 +00:00
title = Zotero . Utilities . cleanString ( title ) ;
2006-06-23 03:02:30 +00:00
if ( title . lastIndexOf ( " ( " ) ! = - 1 & & title . lastIndexOf ( " ) " ) = = title . length - 1 ) {
title = title . substring ( 0 , title . lastIndexOf ( " ( " ) - 1 ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . title = title ;
newItem . complete ( ) ;
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doWeb ( doc , url ) {
var searchRe = new RegExp ( ' ' ^ http : / / www \ . amazon \ . com / ( gp / search / | exec / obidos / search - handle - url / | s / ) ' ' ) ;
var m = searchRe . exec ( doc . location . href )
if ( m ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / Why can ' ' t amazon use the same stylesheets
var xpath ;
if ( m = = " exec/obidos/search-handle-url/ " ) {
xpath = ' ' / / table [ @ cellpadding = " 3 " ] ' ' ;
} else {
xpath = ' ' / / table [ @ class = " searchresults " ] ' ' ;
}
2006-10-02 23:15:27 +00:00
var searchresults = Zotero . Utilities . gatherElementsOnXPath ( doc , doc , xpath , nsResolver ) ;
var items = Zotero . Utilities . getItemArray ( doc , searchresults , ' ' ^ http : / / www \ . amazon \ . com / ( gp / product / | exec / obidos / tg / detail / | [ ^ / ] + / dp / ) ' ' , ' ' ^ ( Buy new | Hardcover | Paperback | Digital ) $ ' ' ) ;
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
var uris = new Array ( ) ;
for ( var i in items ) {
uris . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( uris , function ( doc ) { scrape ( doc ) } ,
function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-06-23 03:02:30 +00:00
} else {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
scrape ( doc ) ;
2006-06-23 03:02:30 +00:00
}
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 838d8849-4ffb-9f44-3d0d-aa8a0a079afe ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' WorldCat ' , ' Simon Kornblith ' , ' ^http://(?:new)?firstsearch\.oclc\.org/WebZ/ ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
2006-08-26 04:59:30 +00:00
var detailRe = / FirstSearch : [ \ w ] + Detailed Record / ;
var searchRe = / FirstSearch : [ \ w ] + List of Records / ;
if ( detailRe . test ( doc . title ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " book " ;
2006-08-26 04:59:30 +00:00
} else if ( searchRe . test ( doc . title ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " multiple " ;
2006-06-25 16:13:47 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' ,
2006-08-17 07:56:01 +00:00
' function processURLs(urls) {
if ( ! urls . length ) { / / last url
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-17 07:56:01 +00:00
return ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-08-17 07:56:01 +00:00
var newUrl = urls . shift ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doPost ( newUrl ,
2006-08-17 07:56:01 +00:00
' ' exportselect = record & exporttype = plaintext ' ' , function ( text ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var lineRegexp = new RegExp ( ) ;
lineRegexp . compile ( " ^([\\w() ]+): *(.*)$ " ) ;
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " book " ) ;
2006-08-17 07:56:01 +00:00
newItem . extra = " " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var lines = text . split ( ' ' \ n ' ' ) ;
for ( var i = 0 ; i < lines . length ; i + + ) {
2006-08-17 07:56:01 +00:00
var testMatch = lineRegexp . exec ( lines [ i ] ) ;
if ( testMatch ) {
var match = newMatch ;
var newMatch = testMatch
} else {
var match = false ;
}
if ( match ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / is a useful match
if ( match [ 1 ] = = ' ' Title ' ' ) {
var title = match [ 2 ] ;
if ( ! lineRegexp . test ( lines [ i + 1 ] ) ) {
i + + ;
title + = ' ' ' ' + lines [ i ] ;
}
if ( title . substring ( title . length - 2 ) = = " / " ) {
title = title . substring ( 0 , title . length - 2 ) ;
}
2006-10-02 23:15:27 +00:00
newItem . title = Zotero . Utilities . capitalizeTitle ( title ) ;
2006-09-08 05:47:47 +00:00
} else if ( match [ 1 ] = = " Series " ) {
newItem . series = match [ 2 ] ;
} else if ( match [ 1 ] = = " Description " ) {
var pageMatch = / ( [ 0 - 9 ] + ) p \ . ? /
var m = pageMatch . exec ( match [ 2 ] ) ;
if ( m ) {
newItem . pages = m [ 1 ] ;
}
} else if ( match [ 1 ] = = ' ' Author ( s ) ' ' | | match [ 1 ] = = " Corp Author(s) " ) {
2006-08-17 07:56:01 +00:00
var yearRegexp = / [ 0 - 9 ] { 4 } - ( [ 0 - 9 ] { 4 } ) ? / ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var authors = match [ 2 ] . split ( ' ' ; ' ' ) ;
if ( authors ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( authors [ 0 ] , " author " , true ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var j = 1 ; j < authors . length ; j + = 2 ) {
2006-08-17 07:56:01 +00:00
if ( authors [ j - 1 ] . substring ( 0 , 1 ) ! = ' ' ( ' ' & & ! yearRegexp . test ( authors [ j ] ) ) {
/ / ignore places where there are parentheses
2006-09-08 05:47:47 +00:00
newItem . creators . push ( { lastName : authors [ j ] , creatorType : " author " , isInstitution : true } ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-07 17:44:55 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanString ( match [ 2 ] ) ) ;
2006-06-07 17:44:55 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( match [ 1 ] = = ' ' Publication ' ' ) {
2006-10-02 23:15:27 +00:00
match [ 2 ] = Zotero . Utilities . cleanString ( match [ 2 ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( match [ 2 ] . substring ( match [ 2 ] . length - 1 ) = = ' ' , ' ' ) {
2006-09-08 05:47:47 +00:00
match [ 2 ] = match [ 2 ] . substring ( 0 , match [ 2 ] . length - 1 ) ;
}
/ / most , but not all , WorldCat publisher / places are
/ / colon delimited
var parts = match [ 2 ] . split ( / ? : ? / ) ;
if ( parts . length = = 2 ) {
newItem . place = parts [ 0 ] ;
newItem . publisher = parts [ 1 ] ;
} else {
newItem . publisher = match [ 2 ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-08-26 04:59:30 +00:00
} else if ( match [ 1 ] = = ' ' Institution ' ' ) {
newItem . publisher = match [ 2 ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( match [ 1 ] = = ' ' Standard No ' ' ) {
2006-09-08 05:47:47 +00:00
var ISBNRe = / ISBN : \ s * ( [ 0 - 9 X ] + ) /
var m = ISBNRe . exec ( match [ 2 ] ) ;
if ( m ) newItem . ISBN = m [ 1 ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( match [ 1 ] = = ' ' Year ' ' ) {
2006-08-14 05:12:28 +00:00
newItem . date = match [ 2 ] ;
2006-08-17 07:56:01 +00:00
} else if ( match [ 1 ] = = " Descriptor " ) {
if ( match [ 2 ] [ match [ 2 ] . length - 1 ] = = " . " ) {
match [ 2 ] = match [ 2 ] . substr ( 0 , match [ 2 ] . length - 1 ) ;
}
var tags = match [ 2 ] . split ( " -- " ) ;
for ( var j in tags ) {
2006-10-02 23:15:27 +00:00
newItem . tags . push ( Zotero . Utilities . cleanString ( tags [ j ] ) ) ;
2006-08-17 07:56:01 +00:00
}
} else if ( match [ 1 ] = = " Accession No " ) {
2006-10-02 23:15:27 +00:00
newItem . accessionNumber = Zotero . Utilities . superCleanString ( match [ 2 ] ) ;
2006-08-26 04:59:30 +00:00
} else if ( match [ 1 ] = = " Degree " ) {
newItem . itemType = " thesis " ;
newItem . thesisType = match [ 2 ] ;
} else if ( match [ 1 ] = = " DOI " ) {
newItem . DOI = match [ 2 ] ;
} else if ( match [ 1 ] = = " Database " ) {
if ( match [ 2 ] . substr ( 0 , 8 ) ! = " WorldCat " ) {
newItem . itemType = " journalArticle " ;
}
2006-09-08 05:47:47 +00:00
} else if ( match [ 1 ] ! = " Availability " & &
match [ 1 ] ! = " Find Items About " & &
match [ 1 ] ! = " Document Type " ) {
2006-08-17 07:56:01 +00:00
newItem . extra + = match [ 1 ] + " : " + match [ 2 ] + " \n " ;
}
} else {
if ( lines [ i ] ! = " " & & lines [ i ] ! = " SUBJECT(S) " ) {
newMatch [ 2 ] + = " " + lines [ i ] ;
2006-06-06 18:25:45 +00:00
}
2006-06-07 17:44:55 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
if ( newItem . extra ) {
newItem . extra = newItem . extra . substr ( 0 , newItem . extra . length - 1 ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-08-17 07:56:01 +00:00
processURLs ( urls ) ;
} ) ;
}
function doWeb ( doc , url ) {
var sessionRegexp = / ( ? : \ ? | \ : ) sessionid = ( [ ^ ? : ] + ) ( ? : \ ? | \ : | $ ) / ;
var numberRegexp = / ( ? : \ ? | \ : ) recno = ( [ ^ ? : ] + ) ( ? : \ ? | \ : | $ ) / ;
var resultsetRegexp = / ( ? : \ ? | \ : ) resultset = ( [ ^ ? : ] + ) ( ? : \ ? | \ : | $ ) / ;
var hostRegexp = new RegExp ( " http://([^/]+)/ " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
var sMatch = sessionRegexp . exec ( url ) ;
var sessionid = sMatch [ 1 ] ;
var hMatch = hostRegexp . exec ( url ) ;
var host = hMatch [ 1 ] ;
var newUri , exportselect ;
2006-08-26 04:59:30 +00:00
var detailRe = / FirstSearch : [ \ w ] + Detailed Record / ;
if ( detailRe . test ( doc . title ) ) {
2006-08-17 07:56:01 +00:00
var publisherRegexp = / ^ ( . * ) , ( . * ? ) , ? $ / ;
var nMatch = numberRegexp . exec ( url ) ;
if ( nMatch ) {
var number = nMatch [ 1 ] ;
} else {
number = 1 ;
}
var rMatch = resultsetRegexp . exec ( url ) ;
if ( rMatch ) {
var resultset = rMatch [ 1 ] ;
} else {
/ / It ' ' s in an XPCNativeWrapper , so we have to do this black magic
resultset = doc . forms . namedItem ( ' ' main ' ' ) . elements . namedItem ( ' ' resultset ' ' ) . value ;
}
urls = [ ' ' http : / / ' ' + host + ' ' / WebZ / DirectExport ? numrecs = 10 : smartpage = directexport : entityexportnumrecs = 10 : entityexportresultset = ' ' + resultset + ' ' : entityexportrecno = ' ' + number + ' ' : sessionid = ' ' + sessionid + ' ' : entitypagenum = 35 : 0 ' ' ] ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , ' ' / WebZ / FSFETCH \ \ ? fetchtype = fullrecord ' ' , ' ' ^ ( See more details for locating this item | Detailed Record ) $ ' ' ) ;
items = Zotero . selectItems ( items ) ;
2006-08-17 07:56:01 +00:00
if ( ! items ) {
return true ;
}
var urls = new Array ( ) ;
for ( var i in items ) {
var nMatch = numberRegexp . exec ( i ) ;
var rMatch = resultsetRegexp . exec ( i ) ;
if ( rMatch & & nMatch ) {
var number = nMatch [ 1 ] ;
var resultset = rMatch [ 1 ] ;
urls . push ( ' ' http : / / ' ' + host + ' ' / WebZ / DirectExport ? numrecs = 10 : smartpage = directexport : entityexportnumrecs = 10 : entityexportresultset = ' ' + resultset + ' ' : entityexportrecno = ' ' + number + ' ' : sessionid = ' ' + sessionid + ' ' : entitypagenum = 35 : 0 ' ' ) ;
}
}
}
processURLs ( urls ) ;
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 88915634-1af6-c134-0171-56fd198235ed ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' LOC/Voyager WebVoyage ' , ' Simon Kornblith ' , ' Pwebrecon\.cgi ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var export_options = doc . forms . namedItem ( ' ' frm ' ' ) . elements . namedItem ( ' ' RD ' ' ) . options ;
for ( var i in export_options ) {
if ( export_options [ i ] . text = = ' ' Latin1 MARC ' '
| | export_options [ i ] . text = = ' ' Raw MARC ' '
| | export_options [ i ] . text = = ' ' UTF - 8 ' '
| | export_options [ i ] . text = = ' ' MARC ( Unicode / UTF - 8 ) ' '
| | export_options [ i ] . text = = ' ' MARC ( non - Unicode / MARC - 8 ) ' ' ) {
/ / We have an exportable single record
if ( doc . forms . namedItem ( ' ' frm ' ' ) . elements . namedItem ( ' ' RC ' ' ) ) {
return " multiple " ;
} else {
return " book " ;
}
2006-06-26 18:05:23 +00:00
}
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' ,
' function doWeb(doc, url) {
var postString = ' '' ' ;
var form = doc . forms . namedItem ( ' ' frm ' ' ) ;
var newUri = form . action ;
var multiple = false ;
2006-06-22 20:50:57 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . forms . namedItem ( ' ' frm ' ' ) . elements . namedItem ( ' ' RC ' ' ) ) {
multiple = true ;
2006-06-22 20:50:57 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var availableItems = new Object ( ) ; / / Technically , associative arrays are objects
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / Require link to match this
var tagRegexp = new RegExp ( ) ;
tagRegexp . compile ( ' ' Pwebrecon \ \ . cgi \ \ ? . * v1 = [ 0 - 9 ] + \ \ & . * ti = ' ' ) ;
/ / Do not allow text to match this
var rejectRegexp = new RegExp ( ) ;
rejectRegexp . compile ( ' ' \ [ [ 0 - 9 ] + \ ] ' ' ) ;
var checkboxes = new Array ( ) ;
var urls = new Array ( ) ;
2006-08-17 07:56:01 +00:00
var tableRows = doc . evaluate ( ' ' / html / body / form / table / tbody / tr [ td / input [ @ type = " checkbox " ] ] ' ' , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
var tableRow ;
var i = 0 ;
while ( tableRow = tableRows . iterateNext ( ) ) {
i + + ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / CHK is what we need to get it all as one file
2006-08-17 07:56:01 +00:00
var input = doc . evaluate ( ' ' . / td / input [ @ name = " CHK " ] ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
checkboxes [ i ] = input . value ;
2006-08-17 07:56:01 +00:00
var links = tableRow . getElementsByTagName ( " a " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through links
for ( var j = 0 ; j < links . length ; j + + ) {
if ( tagRegexp . test ( links [ j ] . href ) ) {
2006-10-02 23:15:27 +00:00
var text = Zotero . Utilities . getNodeString ( doc , links [ j ] , " .//text() " , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( text ) {
2006-10-02 23:15:27 +00:00
text = Zotero . Utilities . cleanString ( text ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! rejectRegexp . test ( text ) ) {
if ( availableItems [ i ] ) {
availableItems [ i ] + = " " + text ;
} else {
availableItems [ i ] = text ;
}
2006-06-22 20:50:57 +00:00
}
}
}
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var items = Zotero . selectItems ( availableItems ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
/ / add arguments for items we need to grab
for ( var i in items ) {
postString + = " CHK= " + checkboxes [ i ] + " & " ;
}
2006-06-22 20:50:57 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var raw , unicode , latin1 ;
2006-06-22 20:50:57 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i = 0 ; i < form . elements . length ; i + + ) {
if ( form . elements [ i ] . type & & form . elements [ i ] . type . toLowerCase ( ) = = ' ' hidden ' ' ) {
postString + = escape ( form . elements [ i ] . name ) + ' ' = ' ' + escape ( form . elements [ i ] . value ) + ' ' & ' ' ;
}
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var export_options = form . elements . namedItem ( ' ' RD ' ' ) . options ;
for ( var i = 0 ; i < export_options . length ; i + + ) {
if ( export_options [ i ] . text = = ' ' Raw MARC ' '
| | export_options [ i ] . text = = ' ' MARC ( non - Unicode / MARC - 8 ) ' ' ) {
raw = i ;
} if ( export_options [ i ] . text = = ' ' Latin1 MARC ' ' ) {
latin1 = i ;
} else if ( export_options [ i ] . text = = ' ' UTF - 8 ' '
| | export_options [ i ] . text = = ' ' MARC ( Unicode / UTF - 8 ) ' ' ) {
unicode = i ;
2006-06-25 18:17:00 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( unicode ) {
var rd = unicode ;
} else if ( latin1 ) {
var rd = latin1 ;
} else if ( raw ) {
var rd = raw ;
} else {
return false ;
2006-06-25 18:17:00 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
postString + = ' ' RD = ' ' + rd + ' ' & MAILADDY = & SAVE = Press + to + SAVE + or + PRINT ' ' ;
/ / No idea why this doesn ' ' t work as post
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( newUri + ' ' ? ' ' + postString , function ( text ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / load translator for MARC
2006-10-02 23:15:27 +00:00
var marc = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
marc . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
marc . setString ( text ) ;
marc . translate ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} )
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' d921155f-0186-1684-615c-ca57682ced9b ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' JSTOR ' , ' Simon Kornblith ' , ' ^http://www\.jstor\.org/(?:view|browse|search/) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / See if this is a seach results page
if ( doc . title = = " JSTOR: Search Results " ) {
return " multiple " ;
2006-06-25 18:17:00 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-06-25 18:17:00 +00:00
/ / If this is a view page , find the link to the citation
2006-08-17 07:56:01 +00:00
var xpath = ' ' / html / body / div [ @ class = " indent " ] / center / / a [ @ class = " nav " ] ' ' ;
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
if ( elmts . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " journalArticle " ;
}
} ' ,
2006-08-17 07:56:01 +00:00
' function getList(urls, each, done) {
2006-06-25 18:17:00 +00:00
var url = urls . shift ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( url , function ( text ) {
2006-06-25 18:17:00 +00:00
if ( each ) {
each ( text ) ;
}
if ( urls . length ) {
2006-08-17 07:56:01 +00:00
getList ( urls , each , done ) ;
2006-06-25 18:17:00 +00:00
} else if ( done ) {
done ( text ) ;
}
2006-08-17 07:56:01 +00:00
} ) ;
}
function getJSTORAttachment ( viewURL ) {
var viewRe = new RegExp ( " (^http://[^/]+/)view([^?]+) " ) ;
var m = viewRe . exec ( viewURL ) ;
if ( m ) {
return { url : m [ 1 ] + " cgi-bin/jstor/printpage " + m [ 2 ] + " .pdf?dowhat=Acrobat " ,
mimeType : " application/pdf " , title : " JSTOR Full Text PDF " ,
downloadable : true } ;
} else {
return false ;
}
2006-06-06 18:25:45 +00:00
}
2006-06-20 17:06:41 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function itemComplete ( newItem , url ) {
2006-08-17 07:56:01 +00:00
if ( newItem . url ) {
newItem . attachments . push ( { url : newItem . url , mimeType : " text/html " ,
title : " JSTOR Web-Readable Version " } ) ;
} else {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( newItem . ISSN ) {
2006-08-17 07:56:01 +00:00
newItem . url = " http://www.jstor.org/browse/ " + newItem . ISSN ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
2006-08-17 07:56:01 +00:00
newItem . url = url ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
2006-08-17 07:56:01 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-06-25 18:17:00 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doWeb ( doc , url ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var saveCitations = new Array ( ) ;
2006-08-17 07:56:01 +00:00
var viewPages = new Array ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . title = = " JSTOR: Search Results " ) {
var availableItems = new Object ( ) ;
/ / Require link to match this
var tagRegexp = new RegExp ( ) ;
tagRegexp . compile ( ' ' citationAction = ' ' ) ;
2006-08-17 07:56:01 +00:00
var tableRows = doc . evaluate ( ' ' / html / body / div [ @ class = " indent " ] / table / tbody / tr [ td / span [ @ class = " printDownloadSaveLinks " ] ] ' ' , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var tableRow ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
var tableView = new Array ( ) ;
var tableSave = new Array ( ) ;
var i = 0 ;
while ( tableRow = tableRows . iterateNext ( ) ) {
i + + ;
var links = tableRow . getElementsByTagName ( " a " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through links
for ( var j = 0 ; j < links . length ; j + + ) {
2006-08-17 07:56:01 +00:00
if ( links [ j ] . href . indexOf ( " citationAction= " ) ! = - 1 ) {
tableSave [ i ] = links [ j ] . href ;
var link = doc . evaluate ( ' ' . / / a [ strong ] ' ' , tableRow , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( link ) {
tableView [ i ] = link . href ;
}
var text = doc . evaluate ( ' ' . / / strong / text ( ) ' ' , tableRow , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( text & & text . nodeValue ) {
2006-10-02 23:15:27 +00:00
text = Zotero . Utilities . cleanString ( text . nodeValue ) ;
2006-08-17 07:56:01 +00:00
if ( availableItems [ i ] ) {
availableItems [ i ] + = " " + text ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
2006-08-17 07:56:01 +00:00
availableItems [ i ] = text ;
2006-06-25 18:17:00 +00:00
}
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
}
2006-10-02 23:15:27 +00:00
var items = Zotero . selectItems ( availableItems ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
for ( var i in items ) {
2006-08-17 07:56:01 +00:00
viewPages . push ( tableView [ i ] ) ;
saveCitations . push ( tableSave [ i ] . replace ( ' ' citationAction = remove ' ' , ' ' citationAction = save ' ' ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
} else {
/ / If this is a view page , find the link to the citation
2006-08-17 07:56:01 +00:00
var xpath = ' ' / html / body / div [ @ class = " indent " ] / center / / a [ @ class = " nav " ] ' ' ;
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var saveCitation = elmts . iterateNext ( ) ;
var viewSavedCitations = elmts . iterateNext ( ) ;
if ( saveCitation & & viewSavedCitations ) {
viewPages . push ( url ) ;
saveCitations . push ( saveCitation . href . replace ( ' ' citationAction = remove ' ' , ' ' citationAction = save ' ' ) ) ;
} else {
throw ( " Could not find citation save links " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( ' ' http : / / www . jstor . org / browse ? citationAction = removeAll & confirmRemAll = on & viewCitations = 1 ' ' , function ( ) { / / clear marked
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Mark all our citations
getList ( saveCitations , null , function ( ) { / / mark this
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( ' ' http : / / www . jstor . org / browse / citations . txt ? exportAction = Save + as + Text + File & exportFormat = cm & viewCitations = 1 ' ' , function ( text ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / get marked
var k = 0 ;
var lines = text . split ( " \n " ) ;
var haveStarted = false ;
var newItemRe = / ^ < [ 0 - 9 ] + > / ;
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " journalArticle " ) ;
2006-08-17 07:56:01 +00:00
newItem . attachments . push ( getJSTORAttachment ( viewPages [ k ] ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i in lines ) {
if ( lines [ i ] . substring ( 0 , 3 ) = = " <1> " ) {
haveStarted = true ;
} else if ( newItemRe . test ( lines [ i ] ) ) {
itemComplete ( newItem , url ) ;
2006-08-17 07:56:01 +00:00
k + + ;
2006-10-02 23:15:27 +00:00
newItem = new Zotero . Item ( " journalArticle " ) ;
2006-08-17 07:56:01 +00:00
newItem . attachments . push ( getJSTORAttachment ( viewPages [ k ] ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( lines [ i ] . substring ( 2 , 5 ) = = " : " & & haveStarted ) {
var fieldCode = lines [ i ] . substring ( 0 , 2 ) ;
2006-10-02 23:15:27 +00:00
var fieldContent = Zotero . Utilities . cleanString ( lines [ i ] . substring ( 5 ) )
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( fieldCode = = " TI " ) {
2006-08-17 07:56:01 +00:00
if ( fieldContent ) {
newItem . title = fieldContent ;
} else {
newItem . title = " [untitled] " ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( fieldCode = = " AU " ) {
var authors = fieldContent . split ( " ; " ) ;
for ( j in authors ) {
if ( authors [ j ] ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( authors [ j ] , " author " , true ) ) ;
2006-06-07 16:48:03 +00:00
}
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( fieldCode = = " SO " ) {
2006-08-06 17:34:41 +00:00
newItem . publicationTitle = fieldContent ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( fieldCode = = " VO " ) {
newItem . volume = fieldContent ;
} else if ( fieldCode = = " NO " ) {
2006-08-06 17:34:41 +00:00
newItem . issue = fieldContent ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( fieldCode = = " SE " ) {
2006-08-06 17:34:41 +00:00
newItem . seriesTitle = fieldContent ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( fieldCode = = " DA " ) {
2006-08-31 00:04:11 +00:00
newItem . date = fieldContent ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( fieldCode = = " PP " ) {
newItem . pages = fieldContent ;
} else if ( fieldCode = = " EI " ) {
2006-08-17 07:56:01 +00:00
newItem . url = fieldContent ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( fieldCode = = " IN " ) {
newItem . ISSN = fieldContent ;
} else if ( fieldCode = = " PB " ) {
newItem . publisher = fieldContent ;
2006-06-06 18:25:45 +00:00
}
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / last item is complete
if ( haveStarted ) {
itemComplete ( newItem , url ) ;
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ) ;
2006-08-17 07:56:01 +00:00
} ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ) ;
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' e85a3134-8c1a-8644-6926-584c8565f23e ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' History Cooperative ' , ' Simon Kornblith ' , ' ^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.s?html$|cgi-bin/search.cgi) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . title = = " History Cooperative: Search Results " ) {
return " multiple " ;
} else {
return " journalArticle " ;
}
2006-06-26 18:05:23 +00:00
} ' ,
2006-10-02 23:15:27 +00:00
' function associateMeta(newItem, metaTags, field, zoteroField) {
2006-06-06 18:25:45 +00:00
var field = metaTags . namedItem ( field ) ;
if ( field ) {
2006-10-02 23:15:27 +00:00
newItem [ zoteroField ] = field . getAttribute ( " content " ) ;
2006-06-06 18:25:45 +00:00
}
}
2006-06-25 18:34:23 +00:00
function scrape ( doc ) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " journalArticle " ) ;
2006-08-17 07:56:01 +00:00
newItem . url = doc . location . href ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-06-25 18:34:23 +00:00
var month , year ;
var metaTags = doc . getElementsByTagName ( " meta " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
associateMeta ( newItem , metaTags , " Title " , " title " ) ;
2006-08-17 07:56:01 +00:00
associateMeta ( newItem , metaTags , " Journal " , " publicationTitle " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
associateMeta ( newItem , metaTags , " Volume " , " volume " ) ;
2006-08-17 07:56:01 +00:00
associateMeta ( newItem , metaTags , " Issue " , " issue " ) ;
2006-06-25 18:34:23 +00:00
var author = metaTags . namedItem ( " Author " ) ;
if ( author ) {
var authors = author . getAttribute ( " content " ) . split ( " and " ) ;
for ( j in authors ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( authors [ j ] , " author " ) ) ;
2006-06-25 18:34:23 +00:00
}
2006-06-06 18:25:45 +00:00
}
2006-06-25 18:34:23 +00:00
2006-08-17 07:56:01 +00:00
var month = metaTags . namedItem ( " PublicationMonth " ) ;
2006-06-25 18:34:23 +00:00
var year = metaTags . namedItem ( " PublicationYear " ) ;
if ( month & & year ) {
2006-08-17 07:56:01 +00:00
newItem . date = month . getAttribute ( " content " ) + " " + year . getAttribute ( " content " ) ;
}
2006-08-26 07:27:02 +00:00
newItem . attachments . push ( { document : doc , title : " History Cooperative Full Text " ,
downloadable : true } ) ;
newItem . complete ( ) ;
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doWeb ( doc , url ) {
if ( doc . title = = " History Cooperative: Search Results " ) {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , ' ' ^ http : / / [ ^ / ] + / journals / . + / . + / . + \ . html $ ' ' ) ;
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
var uris = new Array ( ) ;
for ( var i in items ) {
uris . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( uris , function ( doc ) { scrape ( doc ) } ,
function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
scrape ( doc ) ;
2006-06-25 18:34:23 +00:00
}
2006-06-25 19:32:49 +00:00
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 4fd6b89b-2316-2dc4-fd87-61a97dd941e8 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' InnoPAC ' , ' Simon Kornblith ' , ' ^http://[^/]+/(?:search/|record=) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / First , check to see if the URL alone reveals InnoPAC , since some sites don ' ' t reveal the MARC button
var matchRegexp = new RegExp ( ' ' ^ ( http : / / [ ^ / ] + / search / [ ^ / ] + / [ ^ / ] + / 1 \ % 2 C [ ^ / ] + / ) frameset ( . + ) $ ' ' ) ;
if ( matchRegexp . test ( doc . location . href ) ) {
return " book " ;
2006-06-23 14:12:34 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Next , look for the MARC button
2006-06-18 21:00:43 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
var xpath = ' ' / / a [ img [ @ src = " /screens/marcdisp.gif " or @ alt = " MARC Display " or @ src = " /screens/regdisp.gif " or @ alt = " REGULAR RECORD DISPLAY " ] ] ' ' ;
var elmt = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( elmt ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " book " ;
2006-06-23 14:12:34 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Also , check for links to an item display page
var tags = doc . getElementsByTagName ( " a " ) ;
for ( var i = 0 ; i < tags . length ; i + + ) {
if ( matchRegexp . test ( tags [ i ] . href ) ) {
return " multiple " ;
2006-06-23 14:12:34 +00:00
}
}
2006-06-06 18:25:45 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return false ;
} ' ,
2006-08-17 07:56:01 +00:00
' function scrape(marc, newDoc) {
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var xpath = ' ' / / pre / text ( ) [ 1 ] ' ' ;
var text = newDoc . evaluate ( xpath , newDoc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
2006-08-17 07:56:01 +00:00
var linee = text . split ( " \n " ) ;
for ( var i = 0 ; i < linee . length ; i + + ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( ! linee [ i ] ) {
continue ;
}
linee [ i ] = linee [ i ] . replace ( / [ \ xA0_ \ t ] / g , " " ) ;
var value = linee [ i ] . substr ( 7 ) ;
if ( linee [ i ] . substr ( 0 , 6 ) = = " " ) {
/ / add this onto previous value
tagValue + = value ;
} else {
if ( linee [ i ] . substr ( 0 , 6 ) = = " LEADER " ) {
/ / trap leader
record . leader = value ;
} else {
if ( tagValue ) { / / finish last tag
tagValue = tagValue . replace ( / \ | ( . ) / g , marc . subfieldDelimiter + " $1 " ) ;
if ( tagValue [ 0 ] ! = marc . subfieldDelimiter ) {
tagValue = marc . subfieldDelimiter + " a " + tagValue ;
}
/ / add previous tag
record . addField ( tag , ind , tagValue ) ;
}
var tag = linee [ i ] . substr ( 0 , 3 ) ;
var ind = linee [ i ] . substr ( 4 , 2 ) ;
var tagValue = value ;
}
2006-08-17 07:56:01 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
}
if ( tagValue ) {
tagValue = tagValue . replace ( / \ | ( . ) / g , marc . subfieldDelimiter + " $1 " ) ;
if ( tagValue [ 0 ] ! = marc . subfieldDelimiter ) {
tagValue = marc . subfieldDelimiter + " a " + tagValue ;
2006-08-17 07:56:01 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / add previous tag
record . addField ( tag , ind , tagValue ) ;
2006-08-17 07:56:01 +00:00
}
record . translate ( newItem ) ;
newItem . complete ( ) ;
}
function pageByPage ( marc , urls ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urls , function ( newDoc ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
scrape ( marc . getTranslatorObject ( ) , newDoc ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) } ) ;
2006-08-17 07:56:01 +00:00
}
function doWeb ( doc , url ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = doc . location . href ;
var newUri ;
2006-08-17 07:56:01 +00:00
/ / load translator for MARC
2006-10-02 23:15:27 +00:00
var marc = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
marc . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
2006-08-17 07:56:01 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var matchRegexp = new RegExp ( ' ' ^ ( http : / / [ ^ / ] + / search / [ ^ / ] + / [ ^ / ] + / 1 \ % 2 C [ ^ / ] + / ) frameset ( . + ) $ ' ' ) ;
var m = matchRegexp . exec ( uri ) ;
if ( m ) {
newUri = m [ 1 ] + ' ' marc ' ' + m [ 2 ] ;
} else {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-23 14:12:34 +00:00
2006-08-17 07:56:01 +00:00
var xpath = ' ' / / a [ img [ @ src = " /screens/marcdisp.gif " or @ alt = " MARC Display " ] ] ' ' ;
var aTag = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( aTag ) {
newUri = aTag . href ;
} else {
var xpath = ' ' / / a [ img [ @ src = " /screens/regdisp.gif " or @ alt = " REGULAR RECORD DISPLAY " ] ] ' ' ;
var aTag = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( aTag ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
scrape ( marc . getTranslatorObject ( ) , doc ) ;
2006-08-17 07:56:01 +00:00
return ;
}
2006-06-23 14:12:34 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-23 14:12:34 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( newUri ) { / / single page
2006-08-17 07:56:01 +00:00
pageByPage ( marc , [ newUri ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else { / / Search results page
/ / Require link to match this
var tagRegexp = new RegExp ( ) ;
tagRegexp . compile ( ' ' ^ http : / / [ ^ / ] + / search / [ ^ / ] + / [ ^ / ] + / 1 \ % 2 C [ ^ / ] + / frameset ' ' ) ;
var checkboxes = new Array ( ) ;
var urls = new Array ( ) ;
var availableItems = new Array ( ) ;
2006-09-10 17:38:17 +00:00
var firstURL = false ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
var tableRows = doc . evaluate ( ' ' / / table [ @ class = " browseScreen " ] / / tr [ @ class = " browseEntry " or @ class = " briefCitRow " or td / input [ @ type = " checkbox " ] ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
var i = 0 ;
while ( tableRow = tableRows . iterateNext ( ) ) {
/ / get link
var links = doc . evaluate ( ' ' . / / span [ @ class = " briefcitTitle " ] / a ' ' , tableRow ,
nsResolver , XPathResult . ANY_TYPE , null ) ;
var link = links . iterateNext ( ) ;
if ( ! link ) {
var links = doc . evaluate ( " .//a " , tableRow , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
link = links . iterateNext ( ) ;
}
if ( link ) {
2006-09-10 17:38:17 +00:00
if ( ! checkboxes [ link . href ] ) {
/ / CHK is what we need to get it all as one file
var input = doc . evaluate ( ' ' . / td / input [ @ type = " checkbox " ] ' ' , tableRow ,
nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( input ) {
checkboxes [ link . href ] = input . name + " = " + escape ( input . value ) ;
}
}
if ( availableItems [ link . href ] ) {
continue ;
}
2006-08-17 07:56:01 +00:00
/ / Go through links
while ( link ) {
if ( tagRegexp . test ( link . href ) ) {
2006-09-10 17:38:17 +00:00
if ( ! firstURL ) firstURL = link . href ;
2006-10-02 23:15:27 +00:00
var text = Zotero . Utilities . getNodeString ( doc , link ,
2006-08-17 07:56:01 +00:00
" .//text() " , null ) ;
if ( text ) {
2006-10-02 23:15:27 +00:00
text = Zotero . Utilities . cleanString ( text ) ;
2006-09-10 17:38:17 +00:00
if ( availableItems [ link . href ] ) {
availableItems [ link . href ] + = " " + text ;
2006-08-17 07:56:01 +00:00
} else {
2006-09-10 17:38:17 +00:00
availableItems [ link . href ] = text ;
2006-08-17 07:56:01 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
2006-08-17 07:56:01 +00:00
link = links . iterateNext ( ) ;
2006-06-23 14:12:34 +00:00
}
2006-09-10 17:38:17 +00:00
i + + ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-08-17 07:56:01 +00:00
} ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( urls ) ;
Zotero . Utilities . debug ( availableItems ) ;
var items = Zotero . selectItems ( availableItems ) ;
Zotero . Utilities . debug ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var urlRe = new RegExp ( " ^(https?://[^/]+(/search/[^/]+(?:/|$))) " ) ;
2006-09-10 17:38:17 +00:00
var m = urlRe . exec ( firstURL ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( ! m ) {
throw ( " urlRe choked on " + urls [ 0 ] ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var clearUrl = m [ 0 ] + " ?clear_saves=1 " ;
var postUrl = m [ 0 ] ;
var exportUrl = m [ 1 ] + " ++export/1,-1,-1,B/export " ;
2006-08-17 07:56:01 +00:00
var newUrls = new Array ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var postString = " " ;
2006-08-17 07:56:01 +00:00
var number = 0 ;
2006-09-10 17:38:17 +00:00
for ( var url in items ) {
if ( checkboxes [ url ] ) {
postString + = checkboxes [ url ] + " & " ;
2006-08-17 07:56:01 +00:00
number + + ;
}
2006-09-10 17:38:17 +00:00
var m = matchRegexp . exec ( url ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( ! m ) {
2006-09-10 17:38:17 +00:00
throw ( " matchRegexp choked on " + url ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
}
2006-08-17 07:56:01 +00:00
newUrls . push ( m [ 1 ] + " marc " + m [ 2 ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-08-17 07:56:01 +00:00
if ( postString & & number > 1 ) {
postString + = " save_func=save_marked " ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( clearUrl , function ( ) {
Zotero . Utilities . HTTP . doPost ( postUrl , postString , function ( ) {
Zotero . Utilities . HTTP . doPost ( exportUrl , " ex_format=50&ex_device=45&SUBMIT=Submit " , function ( text ) {
2006-08-17 07:56:01 +00:00
var notSpace = / [ ^ \ s ] /
if ( notSpace . test ( text ) ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
marc . setString ( text ) ;
marc . translate ( ) ;
2006-08-17 07:56:01 +00:00
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-17 07:56:01 +00:00
} else {
pageByPage ( marc , newUrls ) ;
}
} ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ) ;
2006-06-23 14:12:34 +00:00
} ) ;
2006-08-17 07:56:01 +00:00
} else {
pageByPage ( marc , newUrls ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' add7c71c-21f3-ee14-d188-caf9da12728b ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' SIRSI 2003+ ' , ' Simon Kornblith ' , ' /uhtbin/cgisirsi ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var xpath = ' ' / / tr [ th [ @ class = " viewmarctags " ] ] [ td [ @ class = " viewmarctags " ] ] ' ' ;
2006-08-17 07:56:01 +00:00
if ( doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " book " ;
}
var xpath = ' ' / / td [ @ class = " searchsum " ] / table ' ' ;
2006-08-17 07:56:01 +00:00
if ( doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " multiple " ;
}
} ' ,
' function scrape(doc) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-23 16:17:53 +00:00
var xpath = ' ' / / tr [ th [ @ class = " viewmarctags " ] ] [ td [ @ class = " viewmarctags " ] ] ' ' ;
2006-08-17 07:56:01 +00:00
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var elmt = elmts . iterateNext ( ) ;
if ( ! elmt ) {
2006-06-23 16:17:53 +00:00
return false ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " book " ) ;
2006-08-17 07:56:01 +00:00
newItem . extra = " " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
while ( elmt ) {
2006-06-23 16:17:53 +00:00
try {
2006-08-11 15:28:18 +00:00
var node = doc . evaluate ( ' ' . / TD [ 1 ] / A [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-23 16:17:53 +00:00
if ( ! node ) {
2006-08-11 15:28:18 +00:00
var node = doc . evaluate ( ' ' . / TD [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-06 18:25:45 +00:00
}
2006-08-17 07:56:01 +00:00
2006-06-23 16:17:53 +00:00
if ( node ) {
2006-10-02 23:15:27 +00:00
var casedField = Zotero . Utilities . superCleanString ( doc . evaluate ( ' ' . / TH [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ) ;
2006-08-17 07:56:01 +00:00
field = casedField . toLowerCase ( ) ;
2006-10-02 23:15:27 +00:00
var value = Zotero . Utilities . superCleanString ( node . nodeValue ) ;
2006-06-23 16:17:53 +00:00
if ( field = = " publisher " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . publisher = value ;
2006-06-23 16:17:53 +00:00
} else if ( field = = " pub date " ) {
var re = / [ 0 - 9 ] + / ;
var m = re . exec ( value ) ;
2006-08-14 05:12:28 +00:00
newItem . date = m [ 0 ] ;
2006-06-23 16:17:53 +00:00
} else if ( field = = " isbn " ) {
var re = / ^ [ 0 - 9 ] ( ? : [ 0 - 9 X ] + ) / ;
var m = re . exec ( value ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . ISBN = m [ 0 ] ;
2006-06-23 16:17:53 +00:00
} else if ( field = = " title " ) {
var titleParts = value . split ( " / " ) ;
2006-10-02 23:15:27 +00:00
newItem . title = Zotero . Utilities . capitalizeTitle ( titleParts [ 0 ] ) ;
2006-06-23 16:17:53 +00:00
} else if ( field = = " publication info " ) {
var pubParts = value . split ( " : " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . place = pubParts [ 0 ] ;
2006-06-23 16:17:53 +00:00
} else if ( field = = " personal author " ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( value , " author " , true ) ) ;
2006-06-23 16:17:53 +00:00
} else if ( field = = " added author " ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( value , " contributor " , true ) ) ;
2006-06-23 16:17:53 +00:00
} else if ( field = = " corporate author " ) {
2006-10-03 21:08:02 +00:00
newItem . creators . push ( { lastName : author , isInstitution : true } ) ;
2006-08-17 07:56:01 +00:00
} else if ( field = = " subject term " | | field = = " corporate subject " | | field = = " geographic term " ) {
var subjects = value . split ( " -- " ) ;
newItem . tags = newItem . tags . concat ( subjects ) ;
} else if ( field = = " personal subject " ) {
var subjects = value . split ( " , " ) ;
newItem . tags = newItem . tags . push ( value [ 0 ] + " , " + value [ 1 ] ) ;
} else if ( value & & field ! = " http " ) {
newItem . extra + = casedField + " : " + value + " \n " ;
2006-06-06 18:25:45 +00:00
}
}
2006-06-23 16:17:53 +00:00
} catch ( e ) { }
2006-08-17 07:56:01 +00:00
elmt = elmts . iterateNext ( ) ;
}
if ( newItem . extra ) {
newItem . extra = newItem . extra . substr ( 0 , newItem . extra . length - 1 ) ;
2006-06-23 16:17:53 +00:00
}
2006-06-06 18:25:45 +00:00
2006-08-11 15:28:18 +00:00
var callNumber = doc . evaluate ( ' ' / / tr / td [ 1 ] [ @ class = " holdingslist " ] / text ( ) ' ' , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-26 01:08:59 +00:00
if ( callNumber & & callNumber . nodeValue ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . callNumber = callNumber . nodeValue ;
2006-06-26 01:08:59 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-06-23 16:17:53 +00:00
return true ;
2006-06-18 19:04:32 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doWeb ( doc , url ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
if ( ! scrape ( doc ) ) {
var checkboxes = new Array ( ) ;
var urls = new Array ( ) ;
var availableItems = new Array ( ) ;
2006-08-17 07:56:01 +00:00
var tableRows = doc . evaluate ( ' ' / / td [ @ class = " searchsum " ] / table [ / / input [ @ value = " Details " ] ] ' ' , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var tableRow = tableRows . iterateNext ( ) ; / / skip first row
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
while ( tableRow = tableRows . iterateNext ( ) ) {
var input = doc . evaluate ( ' ' . / / input [ @ value = " Details " ] ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-10-02 23:15:27 +00:00
var text = Zotero . Utilities . getNodeString ( doc , tableRow , ' ' . / / label / strong / / text ( ) ' ' , nsResolver ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( text ) {
2006-08-17 07:56:01 +00:00
availableItems [ input . name ] = text ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-23 16:17:53 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var items = Zotero . selectItems ( availableItems ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
var hostRe = new RegExp ( " ^http://[^/]+ " ) ;
var m = hostRe . exec ( doc . location . href ) ;
var hitlist = doc . forms . namedItem ( " hitlist " ) ;
var baseUrl = m [ 0 ] + hitlist . getAttribute ( " action " ) + " ?first_hit= " + hitlist . elements . namedItem ( " first_hit " ) . value + " &last_hit= " + hitlist . elements . namedItem ( " last_hit " ) . value ;
var uris = new Array ( ) ;
for ( var i in items ) {
2006-08-17 07:56:01 +00:00
uris . push ( baseUrl + " & " + i + " =Details " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( uris , function ( doc ) { scrape ( doc ) } ,
function ( ) { Zotero . done ( ) } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-06-23 16:17:53 +00:00
}
2006-08-17 07:56:01 +00:00
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' a77690cf-c5d1-8fc4-110f-d1fc765dcf88 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' ProQuest ' , ' Simon Kornblith ' , ' ^http://[^/]+/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
2006-08-24 18:00:48 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
if ( doc . evaluate ( ' ' / / img [ substring ( @ src , string - length ( @ src ) - 32 ) = " /images/common/logo_proquest.gif " or substring ( @ src , string - length ( @ src ) - 38 ) = " /images/common/logo_proquest_small.gif " ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) ) {
if ( doc . title = = " Results " ) {
return " multiple " ;
} else {
return " magazineArticle " ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-26 18:05:23 +00:00
} ' ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
' function scrape(doc) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
2006-08-17 07:56:01 +00:00
var elmt ;
2006-06-06 18:25:45 +00:00
2006-06-25 19:32:49 +00:00
/ / Title
var xpath = ' ' / html / body / span [ @ class = " textMedium " ] / table / tbody / tr / td [ @ class = " headerBlack " ] / strong / / text ( ) ' ' ;
2006-10-02 23:15:27 +00:00
newItem . title = Zotero . Utilities . getNodeString ( doc , doc , xpath , nsResolver ) ;
2006-06-25 19:32:49 +00:00
/ / Authors
var xpath = ' ' / html / body / span [ @ class = " textMedium " ] / table / tbody / tr / td [ @ class = " textMedium " ] / a / em ' ' ;
2006-08-17 07:56:01 +00:00
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
while ( elmt = elmts . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / there are sometimes additional tags representing higlighting
2006-10-02 23:15:27 +00:00
var author = Zotero . Utilities . getNodeString ( doc , elmt , ' ' . / / text ( ) ' ' , nsResolver ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( author ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( author , " author " ) ) ;
2006-06-06 18:25:45 +00:00
}
2006-06-25 19:32:49 +00:00
}
/ / Other info
var xpath = ' ' / html / body / span [ @ class = " textMedium " ] / font / table / tbody / tr ' ' ;
2006-08-17 07:56:01 +00:00
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
while ( elmt = elmts . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
var field = Zotero . Utilities . superCleanString ( doc . evaluate ( ' ' . / TD [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ) . toLowerCase ( ) ;
2006-06-25 19:32:49 +00:00
if ( field = = " publication title " ) {
2006-08-11 15:28:18 +00:00
var publication = doc . evaluate ( ' ' . / TD [ 2 ] / A [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-25 19:32:49 +00:00
if ( publication . nodeValue ) {
2006-10-02 23:15:27 +00:00
newItem . publicationTitle = Zotero . Utilities . superCleanString ( publication . nodeValue ) ;
2006-06-25 19:32:49 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-11 15:28:18 +00:00
var place = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-25 19:32:49 +00:00
if ( place . nodeValue ) {
2006-10-02 23:15:27 +00:00
newItem . place = Zotero . Utilities . superCleanString ( place . nodeValue ) ;
2006-06-25 19:32:49 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-11 15:28:18 +00:00
var date = doc . evaluate ( ' ' . / TD [ 2 ] / A [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-25 19:32:49 +00:00
if ( date . nodeValue ) {
2006-08-31 00:04:11 +00:00
newItem . date = date . nodeValue ;
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-11 15:28:18 +00:00
var moreInfo = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 2 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-25 19:32:49 +00:00
if ( moreInfo . nodeValue ) {
2006-10-02 23:15:27 +00:00
moreInfo = Zotero . Utilities . superCleanString ( moreInfo . nodeValue ) ;
2006-06-25 19:32:49 +00:00
var parts = moreInfo . split ( " ;\xA0 " ) ;
2006-06-18 19:04:32 +00:00
2006-06-25 19:32:49 +00:00
var issueRegexp = / ^ ( \ w + ) \ . ( ? : | \ xA0 ) ? ( . + ) $ /
var issueInfo = parts [ 0 ] . split ( " ,\xA0 " ) ;
for ( j in issueInfo ) {
var m = issueRegexp . exec ( issueInfo [ j ] ) ;
if ( m ) {
var info = m [ 1 ] . toLowerCase ( ) ;
if ( info = = " vol " ) {
2006-10-02 23:15:27 +00:00
newItem . volume = Zotero . Utilities . superCleanString ( m [ 2 ] ) ;
2006-06-25 19:32:49 +00:00
} else if ( info = = " iss " | | info = = " no " ) {
2006-10-02 23:15:27 +00:00
newItem . issue = Zotero . Utilities . superCleanString ( m [ 2 ] ) ;
2006-06-25 19:32:49 +00:00
}
}
}
2006-10-02 23:15:27 +00:00
if ( parts [ 1 ] & & Zotero . Utilities . superCleanString ( parts [ 1 ] ) . substring ( 0 , 3 ) . toLowerCase ( ) = = " pg. " ) {
2006-06-25 19:32:49 +00:00
var re = / [ 0 - 9 \ - ] + / ;
var m = re . exec ( parts [ 1 ] ) ;
if ( m ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . pages = m [ 0 ] ;
2006-06-25 19:32:49 +00:00
}
2006-06-06 18:25:45 +00:00
}
}
2006-06-25 19:32:49 +00:00
} else if ( field = = " source type " ) {
2006-08-11 15:28:18 +00:00
var value = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-25 19:32:49 +00:00
if ( value . nodeValue ) {
2006-10-02 23:15:27 +00:00
value = Zotero . Utilities . superCleanString ( value . nodeValue ) . toLowerCase ( ) ;
2006-06-25 19:32:49 +00:00
if ( value . indexOf ( " periodical " ) > = 0 ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . itemType = " magazineArticle " ;
2006-06-25 19:32:49 +00:00
} else if ( value . indexOf ( " newspaper " ) > = 0 ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . itemType = " newspaperArticle " ;
} else { / / TODO : support thesis
newItem . itemType = " book " ;
2006-06-25 19:32:49 +00:00
}
2006-06-06 18:25:45 +00:00
}
2006-06-25 19:32:49 +00:00
} else if ( field = = " isbn " | | field = = " issn " | | field = = " issn/isbn " ) {
2006-08-11 15:28:18 +00:00
var value = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-06-25 19:32:49 +00:00
if ( value ) {
var type ;
2006-10-02 23:15:27 +00:00
value = Zotero . Utilities . superCleanString ( value . nodeValue ) ;
2006-06-25 19:32:49 +00:00
if ( value . length = = 10 | | value . length = = 13 ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . ISBN = value ;
2006-06-25 19:32:49 +00:00
} else if ( value . length = = 8 ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . ISSN = value ;
2006-06-25 19:32:49 +00:00
}
2006-06-06 18:25:45 +00:00
}
2006-08-17 07:56:01 +00:00
} else if ( field = = " document url " ) {
var value = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( value ) {
2006-10-02 23:15:27 +00:00
newItem . url = Zotero . Utilities . cleanString ( value . nodeValue ) ;
2006-08-17 07:56:01 +00:00
}
} else if ( field = = " proquest document id " ) {
var value = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( value ) {
2006-10-02 23:15:27 +00:00
newItem . accessionNumber = Zotero . Utilities . cleanString ( value . nodeValue ) ;
2006-08-17 07:56:01 +00:00
}
} else if ( field = = " subjects " | | field = = " people " | | field = = " locations " ) {
var subjects = doc . evaluate ( " .//a " , elmt , nsResolver , XPathResult . ANY_TYPE , null ) ;
var currentSubject ;
while ( currentSubject = subjects . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
var subjectValue = Zotero . Utilities . getNodeString ( doc , currentSubject , " .//text() " , nsResolver ) ;
subjectValue = Zotero . Utilities . superCleanString ( subjectValue ) ;
2006-08-17 07:56:01 +00:00
if ( subjectValue ) {
newItem . tags . push ( subjectValue ) ;
}
}
}
}
/ / magazineArticle - > journalArticle if issue and volume exist
if ( newItem . itemType = = " magazineArticle " & & ( newItem . issue | | newItem . volume ) ) {
newItem . itemType = " journalArticle " ;
}
/ / figure out what we can attach
var attachArray = {
' ' / / td [ @ class = " textSmall " ] / / img [ @ alt = " Full Text - PDF " ] ' ' : " ProQuest Full Text (PDF) " ,
' ' / / td [ @ class = " textSmall " ] / / img [ @ alt = " Text+Graphics " ] ' ' : " ProQuest Full Text (HTML with Graphics) " ,
' ' / / td [ @ class = " textSmall " ] / / img [ @ alt = " Full Text " ] ' ' : " ProQuest Full Text (HTML) " ,
' ' / / td [ @ class = " textSmall " ] / / img [ @ alt = " Abstract " ] ' ' : " ProQuest Abstract "
}
for ( var xpath in attachArray ) {
var item = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( item ) {
var title = attachArray [ xpath ] ;
if ( item . parentNode . tagName . toLowerCase ( ) = = " a " ) {
/ / item is not this page
newItem . attachments . push ( { url : item . parentNode . href ,
title : title , mimeType : ( title = = " ProQuest Full Text (PDF) " ? " application/pdf " : " text/html " ) ,
downloadable : true } ) ;
} else {
/ / item is this page
newItem . attachments . push ( { document : doc , title : title , downloadable : true } ) ;
}
2006-06-25 19:32:49 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-06-25 19:32:49 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doWeb ( doc , url ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
if ( doc . title = = " Results " ) {
var items = new Object ( ) ;
/ / Require link to match this
var tagRegexp = new RegExp ( ) ;
2006-08-26 03:50:15 +00:00
tagRegexp . compile ( ' ' ^ http : / / [ ^ / ] + / pqdweb \ \ ? ( ( ? : . * & ) ? did = . * & Fmt = [ 12 ] ( ? : [ ^ 0 - 9 ] | $ ) | ( ? : . * & ) Fmt = [ 12 ] [ ^ 0 - 9 ] . * & did = ) ' ' ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
var tableRows = doc . evaluate ( ' ' / / tr [ @ class = " rowUnMarked " ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
var tableRow ;
while ( tableRow = tableRows . iterateNext ( ) ) {
var links = tableRow . getElementsByTagName ( " a " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through links
for ( var j = 0 ; j < links . length ; j + + ) {
if ( tagRegexp . test ( links [ j ] . href ) ) {
2006-08-17 07:56:01 +00:00
var text = doc . evaluate ( ' ' . / / a [ @ class = " bold " ] / text ( ) ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( text & & text . nodeValue ) {
2006-10-02 23:15:27 +00:00
text = Zotero . Utilities . cleanString ( text . nodeValue ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
items [ links [ j ] . href ] = text ;
}
break ;
2006-06-25 19:32:49 +00:00
}
2006-06-06 18:25:45 +00:00
}
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
2006-08-24 18:00:48 +00:00
var urls = new Array ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i in items ) {
2006-08-24 18:00:48 +00:00
urls . push ( i ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urls , function ( doc ) { scrape ( doc ) } ,
function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
2006-08-24 18:00:48 +00:00
if ( doc . evaluate ( ' ' / html / body / span [ @ class = " textMedium " ] / table / tbody / tr / td [ @ class = " headerBlack " ] / strong / / text ( ) ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
scrape ( doc ) ;
2006-08-24 18:00:48 +00:00
} else {
var newURL = doc . location . href . replace ( / RQT = [ 0 - 9 ] + / i , " RQT=309 " ) ;
newURL = newURL . replace ( / Fmt = [ 0 - 9 ] + / i , " Fmt=1 " ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . loadDocument ( newURL , function ( doc ) { scrape ( doc ) ; Zotero . done ( ) ; } , null ) ;
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-25 19:32:49 +00:00
}
2006-06-06 18:25:45 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 6773a9af-5375-3224-d148-d32793884dec ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' InfoTrac College Edition ' , ' Simon Kornblith ' , ' ^http://infotrac-college\.thomsonlearning\.com/itw/infomark/ ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . title . substring ( 0 , 8 ) = = " Article " ) {
return " magazineArticle " ;
2006-08-17 07:56:01 +00:00
} else if ( doc . title . substring ( 0 , 10 ) = = " Citations " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " multiple " ;
}
} ' ,
2006-08-17 07:56:01 +00:00
' function extractCitation(url, elmts, title, doc) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
2006-08-17 07:56:01 +00:00
newItem . url = url ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-06-25 22:00:20 +00:00
if ( title ) {
2006-10-02 23:15:27 +00:00
newItem . title = Zotero . Utilities . superCleanString ( title ) ;
2006-06-25 22:00:20 +00:00
}
2006-08-17 07:56:01 +00:00
while ( elmt = elmts . iterateNext ( ) ) {
2006-06-25 22:00:20 +00:00
var colon = elmt . nodeValue . indexOf ( " : " ) ;
var field = elmt . nodeValue . substring ( 1 , colon ) . toLowerCase ( ) ;
var value = elmt . nodeValue . substring ( colon + 1 , elmt . nodeValue . length - 1 ) ;
if ( field = = " title " ) {
2006-10-02 23:15:27 +00:00
newItem . title = Zotero . Utilities . superCleanString ( value ) ;
2006-06-25 22:00:20 +00:00
} else if ( field = = " journal " ) {
2006-08-06 17:34:41 +00:00
newItem . publicationTitle = value ;
2006-06-25 22:00:20 +00:00
} else if ( field = = " pi " ) {
parts = value . split ( " " ) ;
var date = " " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var field = null ;
2006-06-25 22:00:20 +00:00
for ( j in parts ) {
firstChar = parts [ j ] . substring ( 0 , 1 ) ;
if ( firstChar = = " v " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . itemType = " journalArticle " ;
field = " volume " ;
2006-06-25 22:00:20 +00:00
} else if ( firstChar = = " i " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
field = " issue " ;
2006-06-25 22:00:20 +00:00
} else if ( firstChar = = " p " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
field = " pages " ;
var pagesRegexp = / p ( \ w + ) \ ( ( \ w + ) \ ) / ; / / weird looking page range
2006-06-25 22:00:20 +00:00
var match = pagesRegexp . exec ( parts [ j ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( match ) { / / yup , it ' ' s weird
2006-06-25 22:00:20 +00:00
var finalPage = parseInt ( match [ 1 ] ) + parseInt ( match [ 2 ] )
parts [ j ] = " p " + match [ 1 ] + " - " + finalPage . toString ( ) ;
2006-08-17 07:56:01 +00:00
} else if ( ! newItem . itemType ) { / / no , it ' ' s normal
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / check to see if it ' ' s numeric , bc newspaper pages aren ' ' t
2006-06-25 22:00:20 +00:00
var justPageNumber = parts [ j ] . substr ( 1 ) ;
if ( parseInt ( justPageNumber ) . toString ( ) ! = justPageNumber ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . itemType = " newspaperArticle " ;
2006-06-25 22:00:20 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( ! field ) { / / date parts at the beginning , before
/ / anything else
date + = " " + parts [ j ] ;
2006-06-25 22:00:20 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( field ) {
2006-06-25 22:00:20 +00:00
isDate = false ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( parts [ j ] ! = " pNA " ) { / / make sure it ' ' s not an invalid
/ / page number
/ / chop of letter
newItem [ field ] = parts [ j ] . substring ( 1 ) ;
2006-08-17 07:56:01 +00:00
} else if ( ! newItem . itemType ) { / / only newspapers are missing
/ / page numbers on infotrac
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . itemType = " newspaperArticle " ;
2006-06-25 22:00:20 +00:00
}
2006-06-06 18:25:45 +00:00
}
}
2006-06-25 22:00:20 +00:00
/ / Set type
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! newItem . itemType ) {
newItem . itemType = " magazineArticle " ;
2006-06-06 18:25:45 +00:00
}
2006-06-25 22:00:20 +00:00
if ( date ! = " " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . date = date . substring ( 1 ) ;
2006-06-25 22:00:20 +00:00
}
} else if ( field = = " author " ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( value , " author " , true ) ) ;
2006-06-06 18:25:45 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
if ( doc ) {
newItem . attachments . push ( { document : doc , title : " InfoTrac Full Text " ,
downloadable : true } ) ;
} else {
newItem . attachments . push ( { url : url , title : " InfoTrac Full Text " ,
mimeType : " text/html " , downloadable : true } ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-06-06 18:25:45 +00:00
}
2006-06-25 22:00:20 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doWeb ( doc , url ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-25 22:00:20 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = doc . location . href ;
if ( doc . title . substring ( 0 , 8 ) = = " Article " ) { / / article
var xpath = ' ' / html / body / / comment ( ) ' ' ;
2006-08-17 07:56:01 +00:00
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
extractCitation ( uri , elmts ) ;
} else { / / search results
var items = new Array ( ) ;
var uris = new Array ( ) ;
2006-08-17 07:56:01 +00:00
var elmts = new Array ( ) ;
var tableRows = doc . evaluate ( ' ' / html / body / / table / tbody / tr / td [ a / b ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var tableRow ;
var javaScriptRe = / ' ' ( [ ^ ' ' ] * ) ' ' * , * ' ' ( [ ^ ' ' ] * ) ' ' /
var i = 0 ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
while ( tableRow = tableRows . iterateNext ( ) ) {
var link = doc . evaluate ( ' ' . / a ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
var m = javaScriptRe . exec ( link . href ) ;
if ( m ) {
uris [ i ] = " http://infotrac-college.thomsonlearning.com/itw/infomark/192/215/90714844w6 " + m [ 1 ] + " ?sw_aep=olr_wad " + m [ 2 ] ;
}
2006-08-11 15:28:18 +00:00
var article = doc . evaluate ( ' ' . / b / text ( ) ' ' , link , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
items [ i ] = article . nodeValue ;
/ / Chop off final period
if ( items [ i ] . substr ( items [ i ] . length - 1 ) = = " . " ) {
items [ i ] = items [ i ] . substr ( 0 , items [ i ] . length - 1 ) ;
}
2006-08-17 07:56:01 +00:00
elmts [ i ] = doc . evaluate ( " .//comment() " , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) ;
i + + ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
for ( var i in items ) {
2006-08-17 07:56:01 +00:00
extractCitation ( uris [ i ] , elmts [ i ] , items [ i ] ) ;
2006-06-25 22:00:20 +00:00
}
}
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 63c25c45-6257-4985-9169-35b785a2995e ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' InfoTrac OneFile ' , ' Simon Kornblith ' , ' ^https?://[^/]+/itx/(?:[a-z]+Search|retrieve|paginate|tab)\.do ' ,
2006-08-26 03:50:15 +00:00
' function detectWeb(doc, url) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
if ( doc . evaluate ( ' ' / / img [ @ alt = " Thomson Gale " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
if ( doc . evaluate ( ' ' / / table [ @ class = " resultstable " ] [ tbody / tr [ @ class = " unselectedRow " ] ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
return " multiple " ;
} else {
return " journalArticle " ;
}
}
} ' ,
' function infoTracRIS(text) {
/ / load translator for RIS
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
2006-08-26 03:50:15 +00:00
translator . setTranslator ( " 32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7 " ) ;
translator . setString ( text ) ;
translator . setHandler ( " itemDone " , function ( obj , item ) {
if ( item . notes & & item . notes [ 0 ] ) {
item . extra = item . notes [ 0 ] . note ;
delete item . notes ;
item . notes = undefined ;
}
/ / get underscored terms ( term headings ? ) out of tags
for ( var i in item . tags ) {
var index = item . tags [ i ] . indexOf ( " _ " ) ;
if ( index ! = - 1 ) {
item . tags [ i ] = item . tags [ i ] . substr ( 0 , index ) ;
}
}
/ / add names to attachments
for ( var i in item . attachments ) {
if ( ! item . attachments [ i ] . title ) {
item . attachments [ i ] = undefined ;
} else {
item . attachments [ i ] . title = " InfoTrac OneFile " + item . attachments [ i ] . title ;
}
}
/ / item . attachments = newAttachments . shift ( ) ;
2006-10-02 23:15:27 +00:00
/ / Zotero . Utilities . debug ( item . attachments ) ;
2006-08-26 03:50:15 +00:00
item . complete ( ) ;
} ) ;
translator . translate ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-26 03:50:15 +00:00
}
function readEncoded ( url ) {
var newArray = new Array ( ) ;
var parts = url . split ( / [ ? & ] / ) ;
for each ( var part in parts ) {
var index = part . indexOf ( " = " ) ;
if ( index ! = = - 1 ) {
newArray [ part . substr ( 0 , index ) ] = part . substr ( index + 1 ) ;
}
}
return newArray ;
}
function doWeb ( doc , url ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var hostRe = new RegExp ( " ^https?://[^/]+/ " ) ;
var host = hostRe . exec ( doc . location . href ) [ 0 ] ;
if ( doc . evaluate ( ' ' / / table [ @ class = " resultstable " ] [ tbody / tr [ @ class = " unselectedRow " ] ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , ' ' ^ http : / / [ ^ / ] + / itx / retrieve \ \ . do \ \ ? . * docId = ' ' ) ;
items = Zotero . selectItems ( items ) ;
2006-08-26 03:50:15 +00:00
if ( ! items ) {
return true ;
}
/ / parse things out of URLs
var time = new Date ( ) ;
time = time . getTime ( ) ;
var markedString = " " ;
for ( var i in items ) {
var postVal = readEncoded ( i ) ;
markedString + = postVal . tabID + " _ " + postVal . docId + " _1_0_ " + postVal . contentSet + " _srcprod= " + postVal . prodId + " |^ " ;
}
var postData = " inPS=true&ts= " + time + " &prodId= " + postVal . prodId + " &actionCmd=UPDATE_MARK_LIST&userGroupName= " + postVal . userGroupName + " &markedString= " + markedString + " &a= " + time ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( host + " itx/marklist.do?inPS=true&ts= " + time + " &prodId= " + postVal . prodId + " &actionCmd=CLEAR_MARK_LIST&userGroupName= " + postVal . userGroupName ,
2006-08-26 03:50:15 +00:00
function ( text ) { / / clear marked
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doPost ( host + " itx/marklist.do " , postData ,
2006-08-26 03:50:15 +00:00
function ( text ) { / / mark
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( host + " itx/generateCitation.do?contentSet= " + postVal . contentSet + " &inPS=true&tabID=T-ALL&prodId= " + postVal . prodId + " &docId=&actionString=FormatCitation&userGroupName= " + postVal . userGroupName + " &citationFormat=ENDNOTE " ,
2006-08-26 03:50:15 +00:00
function ( text ) { / / get marked
infoTracRIS ( text ) ;
} ) ;
} ) ;
} ) ;
} else {
/ / just extract from single page
var postVal = readEncoded ( url ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( host + " itx/generateCitation.do?contentSet= " + postVal . contentSet + " &inPS=true&tabID= " + postVal . tabID + " &prodId= " + postVal . prodId + " &docId= " + postVal . docId + " &actionString=FormatCitation&citationFormat=ENDNOTE " ,
2006-08-26 03:50:15 +00:00
function ( text ) {
infoTracRIS ( text ) ;
} ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-08-26 03:50:15 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' b047a13c-fe5c-6604-c997-bef15e502b09 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' LexisNexis ' , ' Simon Kornblith ' , ' ^http://web\.lexis-?nexis\.com/universe/(?:document|doclist) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var detailRe = new RegExp ( " ^http://[^/]+/universe/document " ) ;
if ( detailRe . test ( doc . location . href ) ) {
return " newspaperArticle " ;
} else {
return " multiple " ;
}
2006-06-26 18:05:23 +00:00
} ' ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
' function scrape(doc) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
2006-08-17 07:56:01 +00:00
newItem . attachments . push ( { document : doc , title : " LexisNexis Full Text " ,
downloadable : true } ) ;
2006-06-25 20:09:27 +00:00
var citationDataDiv ;
var divs = doc . getElementsByTagName ( " div " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i = 0 ; i < divs . length ; i + + ) {
2006-06-25 20:09:27 +00:00
if ( divs [ i ] . className = = " bodytext " ) {
citationDataDiv = divs [ i ] ;
break ;
}
}
centerElements = citationDataDiv . getElementsByTagName ( " center " ) ;
var elementParts = centerElements [ 0 ] . innerHTML . split ( / < br [ ^ > ] * > / gi ) ;
2006-08-06 17:34:41 +00:00
newItem . publicationTitle = elementParts [ elementParts . length - 1 ] ;
2006-06-25 20:09:27 +00:00
var dateRegexp = / < br [ ^ > ] * > ( ? : < b > ) ? ( [ A - Z ] [ a - z ] + ) ( ? : < \ / b > ) ? ( [ 0 - 9 ] + , [ 0 - 9 ] { 4 } ) / ;
var m = dateRegexp . exec ( centerElements [ centerElements . length - 1 ] . innerHTML ) ;
if ( m ) {
2006-08-31 00:04:11 +00:00
newItem . date = m [ 1 ] + " " + m [ 2 ] ;
2006-06-25 20:09:27 +00:00
} else {
var elementParts = centerElements [ centerElements . length - 1 ] . innerHTML . split ( / < br [ ^ > ] * > / gi ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . date = elementParts [ 1 ] ;
2006-06-25 20:09:27 +00:00
}
var cutIndex = citationDataDiv . innerHTML . indexOf ( " <b>BODY:</b> " ) ;
if ( cutIndex < 0 ) {
cutIndex = citationDataDiv . innerHTML . indexOf ( " <b>TEXT:</b> " ) ;
}
if ( cutIndex > 0 ) {
citationData = citationDataDiv . innerHTML . substring ( 0 , cutIndex ) ;
} else {
citationData = citationDataDiv . innerHTML ;
}
2006-10-02 23:15:27 +00:00
citationData = Zotero . Utilities . cleanTags ( citationData ) ;
2006-06-25 20:09:27 +00:00
var headlineRegexp = / \ n ( ? : HEADLINE | TITLE | ARTICLE ) : ( [ ^ \ n ] + ) \ n / ;
var m = headlineRegexp . exec ( citationData ) ;
if ( m ) {
2006-10-02 23:15:27 +00:00
newItem . title = Zotero . Utilities . cleanTags ( m [ 1 ] ) ;
2006-06-25 20:09:27 +00:00
}
var bylineRegexp = / \ nBYLINE : * ( \ w [ \ w \ - ] + ) / ;
var m = bylineRegexp . exec ( citationData ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( m ) { / / there is a byline ; use it as an author
2006-06-25 20:09:27 +00:00
if ( m [ 1 ] . substring ( 0 , 3 ) . toLowerCase ( ) = = " by " ) {
m [ 1 ] = m [ 1 ] . substring ( 3 ) ;
}
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( m [ 1 ] , " author " ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . itemType = " newspaperArticle " ;
} else { / / no byline ; must be a journal
newItem . itemType = " journalArticle " ;
2006-06-25 20:09:27 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / other ways authors could be encoded
var authorRegexp = / \ n ( ? : AUTHOR | NAME ) : ( [ ^ \ n ] + ) \ n / ;
2006-06-25 20:09:27 +00:00
var m = authorRegexp . exec ( citationData ) ;
if ( m ) {
var authors = m [ 1 ] . split ( / , ( ? : and ) ? / ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i in authors ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( authors [ i ] . replace ( " * " , " " ) , " author " ) ) ;
2006-06-25 20:09:27 +00:00
}
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-06-06 18:25:45 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doWeb ( doc , url ) {
var detailRe = new RegExp ( " ^http://[^/]+/universe/document " ) ;
if ( detailRe . test ( doc . location . href ) ) {
scrape ( doc ) ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , " ^http://[^/]+/universe/document " ) ;
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
var uris = new Array ( ) ;
for ( var i in items ) {
uris . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( uris , function ( doc ) { scrape ( doc ) } ,
function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-06-06 18:25:45 +00:00
}
2006-06-18 19:04:32 +00:00
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' cf87eca8-041d-b954-795a-2d86348999d5 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Aleph ' , ' Simon Kornblith ' , ' ^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find|\?func=scan) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
2006-09-04 20:19:38 +00:00
var singleRe = new RegExp ( " ^http://[^/]+/F/[A-Z0-9\-]+\?.*(?:func=full-set-set.*\&format=[0-9]{3}|func=direct) " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( singleRe . test ( doc . location . href ) ) {
return " book " ;
} else {
var tags = doc . getElementsByTagName ( " a " ) ;
for ( var i = 0 ; i < tags . length ; i + + ) {
if ( singleRe . test ( tags [ i ] . href ) ) {
return " multiple " ;
}
2006-06-23 17:35:57 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' ,
' function doWeb(doc, url) {
2006-09-04 20:19:38 +00:00
var detailRe = new RegExp ( " ^http://[^/]+/F/[A-Z0-9\-]+\?.*(?:func=full-set-set.*\&format=[0-9]{3}|func=direct) " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = doc . location . href ;
var newUris = new Array ( ) ;
if ( detailRe . test ( uri ) ) {
2006-06-23 17:35:57 +00:00
newUris . push ( uri . replace ( / \ & format = [ 0 - 9 ] { 3 } / , " &format=001 " ) )
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
2006-09-04 20:19:38 +00:00
var itemRegexp = ' ' ^ http : / / [ ^ / ] + / F / [ A - Z0 - 9 \ - ] + \ ? . * ( ? : func = full - set - set . * \ & format = 999 | func = direct ) ' '
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , itemRegexp , ' ' ^ [ 0 - 9 ] + $ ' ' ) ;
2006-09-04 20:19:38 +00:00
/ / ugly hack to see if we have any items
var haveItems = false ;
for ( var i in items ) {
haveItems = true ;
break ;
}
/ / If we don ' ' t have any items otherwise , let us use the numbers
if ( ! haveItems ) {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , itemRegexp ) ;
2006-09-04 20:19:38 +00:00
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
2006-09-04 20:19:38 +00:00
if ( ! items ) {
return true ;
}
for ( var i in items ) {
var newUri = i . replace ( " &format=999 " , " &format=001 " ) ;
if ( newUri = = i ) {
newUri + = " &format=001 " ;
}
newUris . push ( newUri ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
var marc = translator . getTranslatorObject ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( newUris , function ( newDoc ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = newDoc . location . href ;
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-08-30 01:41:51 +00:00
var xpath = ' ' / / table / tbody / tr [ td [ 1 ] [ @ id = " bold " ] or td [ @ class = " recordTD " ] ] [ td [ 2 ] ] ' ' ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = newDoc . evaluate ( xpath , newDoc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var elmt ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
while ( elmt = elmts . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
var field = Zotero . Utilities . superCleanString ( doc . evaluate ( ' ' . / TD [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ) ;
var value = Zotero . Utilities . getNodeString ( doc , elmt , ' ' . / TD [ 2 ] / / text ( ) ' ' , nsResolver ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( field = = " LDR " ) {
record . leader = value ;
} else if ( field ! = " FMT " ) {
value = value . replace ( / \ | ( [ a - z ] ) / g , marc . subfieldDelimiter + " $1 " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var code = field . substring ( 0 , 3 ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var ind = " " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( field . length > 3 ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
ind = field [ 3 ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( field . length > 4 ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
ind + = field [ 4 ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-06 18:25:45 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
record . addField ( code , ind , value ) ;
2006-06-06 18:25:45 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
record . translate ( newItem ) ;
newItem . complete ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-06 18:25:45 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 774d7dc2-3474-2684-392c-f787789ec63d ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Dynix ' , ' Simon Kornblith ' , ' ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var detailsRe = new RegExp ( ' ' ipac \ . jsp \ ? . * uri = full = [ 0 - 9 ] ' ' ) ;
if ( detailsRe . test ( doc . location . href ) ) {
return " book " ;
} else {
return " multiple " ;
}
2006-06-26 18:05:23 +00:00
} ' ,
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
' function doWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-06 18:25:45 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = doc . location . href ;
var detailsRe = new RegExp ( ' ' ipac \ . jsp \ ? . * uri = full = [ 0 - 9 ] ' ' ) ;
2006-06-23 20:53:29 +00:00
var uris = new Array ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( detailsRe . test ( uri ) ) {
uris . push ( uri + ' ' & fullmarc = true ' ' ) ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , " ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9] " ) ;
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
2006-06-23 20:53:29 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var buildNewList = new RegExp ( " ^javascript:buildNewList\\(''([^'']+) " ) ;
var uris = new Array ( ) ;
for ( var i in items ) {
var m = buildNewList . exec ( i ) ;
2006-06-06 18:25:45 +00:00
if ( m ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
uris . push ( unescape ( m [ 1 ] + ' ' & fullmarc = true ' ' ) ) ;
} else {
uris . push ( i + ' ' & fullmarc = true ' ' ) ;
2006-06-06 18:25:45 +00:00
}
}
}
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
var marc = translator . getTranslatorObject ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( uris , function ( newDoc ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = newDoc . location . href ;
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var xpath = ' ' / / form / table [ @ class = " tableBackground " ] / tbody / tr / td / table [ @ class = " tableBackground " ] / tbody / tr [ td [ 1 ] / a [ @ class = " normalBlackFont1 " ] ] ' ' ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = newDoc . evaluate ( xpath , newDoc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var elmt ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
while ( elmt = elmts . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
var field = Zotero . Utilities . superCleanString ( newDoc . evaluate ( ' ' . / TD [ 1 ] / A [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ) ;
var value = Zotero . Utilities . getNodeString ( newDoc , elmt , ' ' . / TD [ 2 ] / TABLE [ 1 ] / TBODY [ 1 ] / TR [ 1 ] / TD [ 1 ] / A [ 1 ] / / text ( ) ' ' , nsResolver ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( field = = " LDR " ) {
record . leader = value ;
} else if ( field ! = " FMT " ) {
value = value . replace ( / \ $ ( [ a - z ] ) / g , marc . subfieldDelimiter + " $1 " ) ;
var code = field . substring ( 0 , 3 ) ;
var ind = " " ;
if ( field . length > 3 ) {
ind = field [ 3 ] ;
if ( field . length > 4 ) {
ind + = field [ 4 ] ;
2006-06-23 19:22:24 +00:00
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
record . addField ( code , ind , value ) ;
2006-06-23 19:22:24 +00:00
}
}
2006-06-06 21:35:23 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
record . translate ( newItem ) ;
newItem . complete ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) } , null ) ;
2006-06-06 21:35:23 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-07 16:48:03 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 63a0a351-3131-18f4-21aa-f46b9ac51d87 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' VTLS ' , ' Simon Kornblith ' , ' /chameleon(?:\?|$) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
2006-08-11 15:28:18 +00:00
var node = doc . evaluate ( ' ' / / tr [ @ class = " intrRow " ] / td / table / tbody / tr [ th ] ' ' , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( node ) {
return " multiple " ;
}
2006-08-11 15:28:18 +00:00
var node = doc . evaluate ( ' ' / / a [ text ( ) = " marc " ] ' ' , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( node ) {
return " book " ;
}
2006-06-26 18:05:23 +00:00
} ' ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
' function doWeb(doc, url) {
2006-06-23 20:09:48 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = doc . location . href ;
var newUris = new Array ( ) ;
2006-06-23 20:09:48 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var marcs = doc . evaluate ( ' ' / / a [ text ( ) = " marc " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var record = marcs . iterateNext ( ) ;
2006-06-23 20:09:48 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( record & & ! marcs . iterateNext ( ) ) {
newUris . push ( record . href ) ;
2006-06-23 20:09:48 +00:00
} else {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Require link to match this
var tagRegexp = new RegExp ( ) ;
tagRegexp . compile ( " /chameleon\?.*function=CARDSCR " ) ;
var items = new Array ( ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var tableRows = doc . evaluate ( ' ' / / tr [ @ class = " intrRow " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var tableRow
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
while ( tableRow = tableRows . iterateNext ( ) ) {
var links = tableRow . getElementsByTagName ( " a " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through links
var url ;
for ( var j = 0 ; j < links . length ; j + + ) {
if ( tagRegexp . test ( links [ j ] . href ) ) {
url = links [ j ] . href ;
break ;
}
}
if ( url ) {
/ / Collect title information
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var fields = doc . evaluate ( ' ' . / td / table / tbody / tr [ th ] ' ' , tableRow ,
nsResolver , XPathResult . ANY_TYPE , null ) ;
var field ;
while ( field = fields . iterateNext ( ) ) {
var header = doc . evaluate ( ' ' . / th / text ( ) ' ' , fields [ j ] , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( header . nodeValue = = " Title " ) {
2006-10-02 23:15:27 +00:00
var value = Zotero . Utilities . getNodeString ( doc , fields [ j ] , ' ' . / td / / text ( ) ' ' , nsResolver ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( value ) {
2006-10-02 23:15:27 +00:00
items [ url ] = Zotero . Utilities . cleanString ( value ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
}
}
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
for ( var i in items ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( i . replace ( / function = [ A - Z ] { 7 } / , " function=MARCSCR " ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newUris . push ( i . replace ( / function = [ A - Z ] { 7 } / , " function=MARCSCR " ) ) ;
}
2006-06-23 20:09:48 +00:00
}
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
var marc = translator . getTranslatorObject ( ) ;
2006-06-23 20:09:48 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( newUris , function ( newDoc ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = newDoc . location . href
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var xpath = ' ' / / table [ @ class = " outertable " ] / tbody / tr [ td [ 4 ] ] ' ' ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = newDoc . evaluate ( xpath , newDoc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
while ( elmt = elmts . iterateNext ( ) ) {
2006-08-11 15:28:18 +00:00
var field = doc . evaluate ( ' ' . / TD [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
var ind1 = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
var ind2 = doc . evaluate ( ' ' . / TD [ 3 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
var value = doc . evaluate ( ' ' . / TD [ 4 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
value = value . replace ( / \ \ ( [ a - z ] ) / g , marc . subfieldDelimiter + " $1 " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
record . addField ( field , ind1 + ind2 , value ) ;
2006-06-23 20:09:48 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
record . translate ( newItem ) ;
newItem . complete ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-07 16:48:03 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' fb12ae9e-f473-cab4-0546-27ab88c64101 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' DRA ' , ' Simon Kornblith ' , ' /web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . location . href . indexOf ( " /authority_hits " ) > 0 ) {
return " multiple " ;
} else {
return " book " ;
}
2006-06-26 18:05:23 +00:00
} ' ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
' function doWeb(doc, url) {
var checkItems = false ;
2006-06-23 21:27:32 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . location . href . indexOf ( " /authority_hits " ) > 0 ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-10-02 23:15:27 +00:00
checkItems = Zotero . Utilities . gatherElementsOnXPath ( doc , doc , " /html/body//ol/li " , nsResolver ) ;
2006-06-23 21:27:32 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( checkItems & & checkItems . length ) {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , checkItems , ' ' https ? : / / . * / web2 / tramp2 \ . exe / see_record ' ' ) ;
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
var uris = new Array ( ) ;
for ( var i in items ) {
uris . push ( i ) ;
}
} else {
2006-08-26 03:50:15 +00:00
var ug = new Array ( doc . location . href ) ;
2006-06-23 21:27:32 +00:00
}
2006-06-07 16:48:03 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i in uris ) {
var uri = uris [ i ] ;
var uriRegexp = / ^ ( https ? : \ / \ / . * \ / web2 \ / tramp2 \ . exe \ / ) ( ? : goto | see \ _record | authority \ _hits ) ( \ / . * ) \ ? ( ? : screen = Record \ . html \ & ) ? ( . * ) $ / i ;
var m = uriRegexp . exec ( uri ) ;
if ( uri . indexOf ( " /authority_hits " ) < 0 ) {
var newUri = m [ 1 ] + " download_record " + m [ 2 ] + " /RECORD.MRC?format=marc& " + m [ 3 ] ;
2006-06-07 16:48:03 +00:00
} else {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var newUri = m [ 1 ] + " download_record " + m [ 2 ] + " /RECORD.MRC?format=marc " ;
2006-06-07 16:48:03 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Keep track of how many requests have been completed
var j = 0 ;
2006-06-07 16:48:03 +00:00
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
2006-06-07 16:48:03 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( newUri , function ( text ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setString ( text ) ;
translator . translate ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
j + + ;
if ( j = = uris . length ) {
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
} ) ;
2006-06-07 16:48:03 +00:00
}
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-07 17:44:55 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' c0e6fda6-0ecd-e4f4-39ca-37a4de436e15 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' GEAC ' , ' Simon Kornblith ' , ' /(?:GeacQUERY|GeacFETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . location . href . indexOf ( " /GeacQUERY " ) > 0 ) {
return " multiple " ;
} else {
2006-06-26 18:05:23 +00:00
return " book " ;
2006-06-07 17:44:55 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' ,
' function doWeb(doc, url) {
var uri = doc . location . href ;
2006-06-24 14:35:05 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uris = new Array ( ) ;
2006-06-24 14:35:05 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( uri . indexOf ( " /GeacQUERY " ) > 0 ) {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , " (?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html) " ) ;
items = Zotero . selectItems ( items ) ;
2006-06-24 14:35:05 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
2006-06-24 14:35:05 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uris = new Array ( ) ;
for ( var i in items ) {
var newUri = i . replace ( / ( [ : & ] ) next = html \ / geacnffull . html / , " $1next=html/marc.html " ) ;
newUri = newUri . replace ( / ( [ : & ] ) next = html \ / record . html / , " $1next=html/marc.html " ) ;
uris . push ( newUri ) ;
2006-06-24 14:35:05 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
var newUri = uri . replace ( / ( [ : & ] ) next = html \ / geacnffull . html / , " $1next=html/marc.html " ) ;
newUri = newUri . replace ( / ( [ : & ] ) next = html \ / record . html / , " $1next=html/marc.html " ) ;
uris . push ( newUri ) ;
2006-06-07 17:44:55 +00:00
}
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
var marc = translator . getTranslatorObject ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( uris , function ( newDoc ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = newDoc . location . href ;
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = newDoc . evaluate ( ' ' / / pre / text ( ) ' ' , newDoc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var elmt , tag , content ;
var ind = " " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
while ( elmt = elmts . iterateNext ( ) ) {
var line = elmt . nodeValue ;
2006-06-24 14:35:05 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( line . substring ( 0 , 6 ) = = " " ) {
content + = " " + line . substring ( 6 ) ;
continue ;
} else {
2006-06-24 14:35:05 +00:00
if ( tag ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
record . addField ( tag , ind , content ) ;
2006-06-24 14:35:05 +00:00
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
line = line . replace ( / [ _ \ t \ xA0 ] / g , " " ) ; / / nbsp
2006-06-24 14:35:05 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
tag = line . substr ( 0 , 3 ) ;
if ( tag [ 0 ] ! = " 0 " | | tag [ 1 ] ! = " 0 " ) {
ind = line . substr ( 4 , 2 ) ;
content = line . substr ( 7 ) . replace ( / \ $ ( [ a - z ] ) ( ? : | $ ) / g , marc . subfieldDelimiter + " $1 " ) ;
2006-06-24 14:35:05 +00:00
} else {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( tag = = " 000 " ) {
tag = undefined ;
record . leader = " 00000 " + line . substr ( 4 ) ;
} else {
content = line . substr ( 4 ) ;
}
2006-06-07 17:44:55 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-06-07 17:44:55 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
record . translate ( newItem ) ;
newItem . complete ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-07 18:44:27 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 5287d20c-8a13-6004-4dcb-5bb2b66a9cc9 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' SIRSI -2003 ' , ' Simon Kornblith ' , ' /uhtbin/cgisirsi ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-24 15:38:53 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = doc . evaluate ( ' ' / html / body / form / p / text ( ) [ 1 ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var elmt ;
while ( elmt = elmts . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . superCleanString ( elmt . nodeValue ) = = " Viewing record " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " book " ;
}
2006-06-24 15:38:53 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var xpath = ' ' / / form [ @ name = " hitlist " ] / table / tbody / tr ' ' ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
if ( elmts . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return " multiple " ;
2006-06-24 15:38:53 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' ,
' function doWeb(doc, url) {
var namespace = doc . documentElement . namespaceURI ;
2006-06-07 18:44:27 +00:00
var nsResolver = namespace ? function ( prefix ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
2006-06-07 18:44:27 +00:00
} : null ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = doc . location . href ;
var recNumbers = new Array ( ) ;
2006-06-07 18:44:27 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var xpath = ' ' / / form [ @ name = " hitlist " ] / table / tbody / tr ' ' ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var elmt = elmts . iterateNext ( ) ;
if ( elmt ) { / / Search results page
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uriRegexp = / ^ http : \ / \ / [ ^ \ / ] + / ;
var m = uriRegexp . exec ( uri ) ;
var postAction = doc . forms . namedItem ( " hitlist " ) . getAttribute ( " action " ) ;
var newUri = m [ 0 ] + postAction . substr ( 0 , postAction . length - 1 ) + " 40 "
2006-06-07 18:44:27 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var titleRe = / < br > \ s * ( . * [ ^ \ s ] ) \ s * < br > / i ;
2006-06-07 18:44:27 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var items = new Array ( ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
do {
var checkbox = doc . evaluate ( ' ' . / / input [ @ type = " checkbox " ] ' ' , elmt , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Collect title
2006-10-02 23:15:27 +00:00
var title = Zotero . Utilities . getNodeString ( doc , elmt , " ./td[2]/text() " , nsResolver ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( checkbox & & title ) {
2006-10-02 23:15:27 +00:00
items [ checkbox . name ] = Zotero . Utilities . cleanString ( title ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
}
} while ( elmt = elmts . iterateNext ( ) ) ;
2006-06-07 18:44:27 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
2006-06-07 18:44:27 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i in items ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
recNumbers . push ( i ) ;
2006-06-07 18:44:27 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else { / / Normal page
var uriRegexp = / ^ ( . * ) ( \ / [ 0 - 9 ] + ) $ / ;
var m = uriRegexp . exec ( uri ) ;
var newUri = m [ 1 ] + " /40 "
2006-06-07 18:44:27 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = doc . evaluate ( ' ' / html / body / form / p ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
while ( elmt = elmts . iterateNext ( ) ) {
2006-08-11 15:28:18 +00:00
var initialText = doc . evaluate ( ' ' . / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-10-02 23:15:27 +00:00
if ( initialText & & initialText . nodeValue & & Zotero . Utilities . superCleanString ( initialText . nodeValue ) = = " Viewing record " ) {
2006-08-11 15:28:18 +00:00
recNumbers . push ( doc . evaluate ( ' ' . / b [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
break ;
}
2006-06-25 21:12:14 +00:00
}
2006-06-07 21:26:55 +00:00
}
2006-06-25 21:12:14 +00:00
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
var marc = translator . getTranslatorObject ( ) ;
2006-06-25 21:12:14 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . loadDocument ( newUri + ' ' ? marks = ' ' + recNumbers . join ( " , " ) + ' ' & shadow = NO & format = FLAT + ASCII & sort = TITLE & vopt_elst = ALL & library = ALL & display_rule = ASCENDING & duedate_code = l & holdcount_code = t & DOWNLOAD_x = 22 & DOWNLOAD_y = 12 & address = & form_type = ' ' , function ( doc ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var pre = doc . getElementsByTagName ( " pre " ) ;
var text = pre [ 0 ] . textContent ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var documents = text . split ( " *** DOCUMENT BOUNDARY *** " ) ;
for ( var j = 1 ; j < documents . length ; j + + ) {
var uri = newUri + " ?marks= " + recNumbers [ j ] + " &shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type= " ;
var lines = documents [ j ] . split ( " \n " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
var tag , content ;
var ind = " " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i = 0 ; i < lines . length ; i + + ) {
var line = lines [ i ] ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( line [ 0 ] = = " . " & & line . substr ( 4 , 2 ) = = " . " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( tag ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
content = content . replace ( / \ | ( [ a - z ] ) / g , marc . subfieldDelimiter + " $1 " ) ;
record . addField ( tag , ind , content ) ;
2006-06-25 21:12:14 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
content + = " " + line . substr ( 6 ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
continue ;
}
tag = line . substr ( 1 , 3 ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( tag [ 0 ] ! = " 0 " | | tag [ 1 ] ! = " 0 " ) {
ind = line . substr ( 6 , 2 ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
content = line . substr ( 8 ) ;
} else {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
content = line . substr ( 7 ) ;
if ( tag = = " 000 " ) {
tag = undefined ;
record . leader = " 00000 " + content ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " the leader is: " + record . leader ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
}
2006-06-25 21:12:14 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
record . translate ( newItem ) ;
newItem . complete ( ) ;
2006-06-25 21:12:14 +00:00
}
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ) ;
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 0f9fc2fc-306e-5204-1117-25bca009dffc ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' TLC/YouSeeMore ' , ' Simon Kornblith ' , ' TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var detailRe = new RegExp ( " TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9] " ) ;
if ( detailRe . test ( doc . location . href ) ) {
return " book " ;
} else {
return " multiple " ;
2006-06-25 21:12:14 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' ,
' function doWeb(doc, url) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-06-25 21:12:14 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var detailRe = new RegExp ( " TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9] " ) ;
var uri = doc . location . href ;
var newUris = new Array ( ) ;
2006-06-25 21:12:14 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( detailRe . test ( uri ) ) {
newUris . push ( uri . replace ( " LabelDisplay " , " MARCDisplay " ) ) ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , ' ' TLCScripts / interpac \ . dll \ ? . * LabelDisplay . * RecordNumber = [ 0 - 9 ] ' ' ) ;
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
for ( var i in items ) {
newUris . push ( i . replace ( " LabelDisplay " , " MARCDisplay " ) ) ;
2006-06-25 21:12:14 +00:00
}
}
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
var marc = translator . getTranslatorObject ( ) ;
2006-06-25 21:12:14 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( newUris , function ( newDoc ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var uri = newDoc . location . href ;
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var elmts = newDoc . evaluate ( ' ' / html / body / table / tbody / tr [ td [ 4 ] ] ' ' , newDoc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var tag , ind , content , elmt ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
while ( elmt = elmts . iterateNext ( ) ) {
2006-08-11 15:28:18 +00:00
tag = newDoc . evaluate ( ' ' . / td [ 2 ] / tt [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
var inds = newDoc . evaluate ( ' ' . / td [ 3 ] / tt [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
tag = tag . replace ( / [ \ r \ n ] / g , " " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
inds = inds . replace ( / [ \ r \ n \ xA0 ] / g , " " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var children = newDoc . evaluate ( ' ' . / td [ 4 ] / tt [ 1 ] / / text ( ) ' ' , elmt , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var subfield = children . iterateNext ( ) ;
var fieldContent = children . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( tag = = " LDR " ) {
record . leader = " 00000 " + subfield . nodeValue ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
content = " " ;
if ( ! fieldContent ) {
content = subfield . nodeValue ;
} else {
while ( subfield & & fieldContent ) {
content + = marc . subfieldDelimiter + subfield . nodeValue . substr ( 1 , 1 ) + fieldContent . nodeValue ;
var subfield = children . iterateNext ( ) ;
var fieldContent = children . iterateNext ( ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
record . addField ( tag , inds , content ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-25 21:12:14 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
record . translate ( newItem ) ;
newItem . complete ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) ; } , null ) ;
2006-06-25 21:12:14 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-06-25 21:12:14 +00:00
} ' );
2006-06-08 01:26:40 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' c54d1932-73ce-dfd4-a943-109380e06574 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Project MUSE ' , ' Simon Kornblith ' , ' ^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var searchRe = new RegExp ( " ^http://[^/]+/search/pia\.cgi " ) ;
if ( searchRe . test ( url ) ) {
return " multiple " ;
} else {
return " journalArticle " ;
2006-06-08 01:26:40 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' ,
' function doWeb(doc, url) {
2006-06-24 17:33:35 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var searchRe = new RegExp ( " ^http://[^/]+/search/pia\.cgi " ) ;
if ( searchRe . test ( doc . location . href ) ) {
var items = new Array ( ) ;
2006-08-17 07:56:01 +00:00
var attachments = new Array ( ) ;
var pdfRe = / \ . pdf $ / i ;
var htmlRe = / \ . html $ / i ;
var tableRows = doc . evaluate ( ' ' / html / body / table [ @ class = " navbar " ] / tbody / tr / td / form / table ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var tableRow ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
while ( tableRow = tableRows . iterateNext ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / article_id is what we need to get it all as one file
2006-08-17 07:56:01 +00:00
var input = doc . evaluate ( ' ' . / tbody / tr / td / input [ @ name = " article_id " ] ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
var link = doc . evaluate ( ' ' . / / b / i / a / text ( ) ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( input & & input . value & & link & & link . nodeValue ) {
items [ input . value ] = link . nodeValue ;
2006-08-17 07:56:01 +00:00
var aTags = tableRow . getElementsByTagName ( " a " ) ;
/ / get attachments
attachments [ input . value ] = new Array ( ) ;
for ( var i = 0 ; i < aTags . length ; i + + ) {
if ( pdfRe . test ( aTags [ i ] . href ) ) {
attachments [ input . value ] . push ( { url : aTags [ i ] . href ,
title : " Project MUSE Full Text (PDF) " ,
mimeType : " application/pdf " ,
downloadable : true } ) ;
} else if ( htmlRe . test ( aTags [ i ] . href ) ) {
attachments [ input . value ] . push ( { url : aTags [ i ] . href ,
title : " Project MUSE Full Text (HTML) " ,
mimeType : " text/html " ,
downloadable : true } ) ;
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
try {
var search_id = doc . forms . namedItem ( " results " ) . elements . namedItem ( " search_id " ) . value ;
} catch ( e ) {
var search_id = " " ;
}
var articleString = " " ;
2006-08-17 07:56:01 +00:00
var newAttachments = new Array ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i in items ) {
articleString + = " &article_id= " + i ;
2006-08-17 07:56:01 +00:00
newAttachments . push ( attachments [ i ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
var savePostString = " actiontype=save&search_id= " + search_id + articleString ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( " http://muse.jhu.edu/search/save.cgi? " + savePostString , function ( ) {
Zotero . Utilities . HTTP . doGet ( " http://muse.jhu.edu/search/export.cgi?exporttype=endnote " + articleString , function ( text ) {
Zotero . Utilities . debug ( text ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / load translator for RIS
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " 32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7 " ) ;
translator . setString ( text ) ;
translator . setHandler ( " itemDone " , function ( obj , item ) {
if ( item . notes & & item . notes [ 0 ] ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( item . notes ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
item . extra = item . notes [ 0 ] . note ;
delete item . notes ;
item . notes = undefined ;
}
item . attachments = newAttachments . shift ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( item . attachments ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
item . complete ( ) ;
} ) ;
translator . translate ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} , function ( ) { } ) ;
} , function ( ) { } ) ;
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " journalArticle " ) ;
2006-08-17 07:56:01 +00:00
newItem . url = url ;
newItem . attachments . push ( { title : " Project MUSE Full Text (HTML) " , mimeType : " text/html " ,
url : url , downloadable : true } ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-17 07:56:01 +00:00
var getPDF = doc . evaluate ( ' ' / / a [ text ( ) = " [Access article in PDF] " ] ' ' , doc ,
nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( getPDF ) {
newItem . attachments . push ( { title : " Project MUSE Full Text (PDF) " , mimeType : " application/pdf " ,
url : getPDF . href , downloadable : true } ) ;
}
var elmts = doc . evaluate ( ' ' / / comment ( ) ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var headerRegexp = / HeaderData ( ( ? : . | \ n ) * ) \ #\#EndHeaders/i
while ( elmt = elmts . iterateNext ( ) ) {
if ( elmt . nodeValue . substr ( 0 , 10 ) = = " HeaderData " ) {
var m = headerRegexp . exec ( elmt . nodeValue ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var headerData = m [ 1 ] ;
}
}
/ / Use E4X rather than DOM / XPath , because the Mozilla gods have decided not to
/ / expose DOM / XPath to sandboxed scripts
var newDOM = new XML ( headerData ) ;
2006-08-06 17:34:41 +00:00
newItem . publicationTitle = newDOM . journal . text ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . volume = newDOM . volume . text ( ) ;
2006-08-06 17:34:41 +00:00
newItem . issue = newDOM . issue . text ( ) ;
2006-08-14 05:12:28 +00:00
newItem . date = newDOM . pubdate . text ( ) . toString ( ) ;
if ( ! newItem . date ) {
newItem . date = newDOM . year . text ( ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . title = newDOM . doctitle . text ( ) ;
newItem . ISSN = newDOM . issn . text ( ) ;
/ / Do pages
var fpage = newDOM . fpage . text ( ) ;
var lpage = newDOM . lpage . text ( ) ;
if ( fpage ! = " " ) {
newItem . pages = fpage ;
if ( lpage ) {
newItem . pages + = " - " + lpage ;
}
}
/ / Do authors
var elmts = newDOM . docauthor ;
for ( var i in elmts ) {
var fname = elmts [ i ] . fname . text ( ) ;
var surname = elmts [ i ] . surname . text ( ) ;
newItem . creators . push ( { firstName : fname , lastName : surname , creatorType : " author " } ) ;
}
newItem . complete ( ) ;
2006-06-24 17:33:35 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' fcf41bed-0cbc-3704-85c7-8062a0068a7a ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 12 , ' PubMed ' , ' Simon Kornblith ' , ' ^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( doc . location . href . indexOf ( " list_uids= " ) > = 0 ) {
return " journalArticle " ;
} else {
return " multiple " ;
2006-06-24 17:33:35 +00:00
}
2006-08-08 01:06:33 +00:00
}
function getPMID ( co ) {
var coParts = co . split ( " & " ) ;
for each ( part in coParts ) {
if ( part . substr ( 0 , 7 ) = = " rft_id= " ) {
var value = unescape ( part . substr ( 7 ) ) ;
if ( value . substr ( 0 , 10 ) = = " info:pmid/ " ) {
return value . substr ( 10 ) ;
}
2006-06-08 01:26:40 +00:00
}
2006-08-08 01:06:33 +00:00
}
}
function detectSearch ( item ) {
if ( item . contextObject ) {
if ( getPMID ( item . contextObject ) ) {
return " journalArticle " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
2006-08-08 01:06:33 +00:00
return false ;
} ' ,
2006-08-17 07:56:01 +00:00
' function lookupPMIDs(ids, doc) {
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var newUri = " http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id= " + ids . join ( " , " ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( newUri , function ( text ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Remove xml parse instruction and doctype
text = text . replace ( / < ! DOCTYPE [ ^ > ] * > / , " " ) . replace ( / < \ ? xml [ ^ > ] * \ ? > / , " " ) ;
var xml = new XML ( text ) ;
for ( var i = 0 ; i < xml . PubmedArticle . length ( ) ; i + + ) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " journalArticle " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var citation = xml . PubmedArticle [ i ] . MedlineCitation ;
2006-08-17 07:56:01 +00:00
var PMID = citation . PMID . text ( ) . toString ( ) ;
newItem . accessionNumber = " PMID " + PMID ;
/ / add attachments
if ( doc ) {
newItem . attachments . push ( { document : doc , title : " PubMed Abstract " ,
downloadable : true } ) ;
} else {
var url = " http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=AbstractPlus&list_uids= " + PMID ;
newItem . attachments . push ( { url : url , title : " PubMed Abstract (HTML) " ,
mimeType : " text/html " , downloadable : true } ) ;
}
2006-06-08 01:26:40 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var article = citation . Article ;
if ( article . ArticleTitle . length ( ) ) {
var title = article . ArticleTitle . text ( ) . toString ( ) ;
if ( title . substr ( - 1 ) = = " . " ) {
title = title . substring ( 0 , title . length - 1 ) ;
}
newItem . title = title ;
2006-06-08 01:26:40 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( article . Journal . length ( ) ) {
var issn = article . Journal . ISSN . text ( ) ;
if ( issn ) {
newItem . ISSN = issn . replace ( / [ ^ 0 - 9 ] / g , " " ) ;
}
2006-10-02 23:15:27 +00:00
newItem . journalAbbreviation = Zotero . Utilities . superCleanString ( citation . MedlineJournalInfo . MedlineTA . text ( ) . toString ( ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( article . Journal . Title . length ( ) ) {
2006-10-02 23:15:27 +00:00
newItem . publicationTitle = Zotero . Utilities . superCleanString ( article . Journal . Title . text ( ) . toString ( ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( citation . MedlineJournalInfo . MedlineTA . length ( ) ) {
2006-08-31 00:04:11 +00:00
newItem . publicationTitle = newItem . journalAbbreviation ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
if ( article . Journal . JournalIssue . length ( ) ) {
newItem . volume = article . Journal . JournalIssue . Volume . text ( ) ;
2006-08-06 17:34:41 +00:00
newItem . issue = article . Journal . JournalIssue . Issue . text ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( article . Journal . JournalIssue . PubDate . length ( ) ) { / / try to get the date
if ( article . Journal . JournalIssue . PubDate . Day . text ( ) . toString ( ) ! = " " ) {
2006-08-31 00:04:11 +00:00
newItem . date = article . Journal . JournalIssue . PubDate . Month . text ( ) + " " + article . Journal . JournalIssue . PubDate . Day . text ( ) + " , " + article . Journal . JournalIssue . PubDate . Year . text ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( article . Journal . JournalIssue . PubDate . Month . text ( ) . toString ( ) ! = " " ) {
2006-08-31 00:04:11 +00:00
newItem . date = article . Journal . JournalIssue . PubDate . Month . text ( ) + " " + article . Journal . JournalIssue . PubDate . Year . text ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( article . Journal . JournalIssue . PubDate . Year . text ( ) . toString ( ) ! = " " ) {
2006-08-31 00:04:11 +00:00
newItem . date = article . Journal . JournalIssue . PubDate . Year . text ( ) ;
2006-06-25 05:03:01 +00:00
}
}
2006-06-08 01:26:40 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( article . AuthorList . length ( ) & & article . AuthorList . Author . length ( ) ) {
var authors = article . AuthorList . Author ;
for ( var j = 0 ; j < authors . length ( ) ; j + + ) {
var lastName = authors [ j ] . LastName . text ( ) . toString ( ) ;
var firstName = authors [ j ] . FirstName . text ( ) . toString ( ) ;
if ( firstName = = " " ) {
var firstName = authors [ j ] . ForeName . text ( ) . toString ( ) ;
}
if ( firstName | | lastName ) {
newItem . creators . push ( { lastName : lastName , firstName : firstName } ) ;
}
2006-06-08 01:26:40 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-06-08 01:26:40 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-08 01:06:33 +00:00
} ) ;
}
function doWeb ( doc , url ) {
var uri = doc . location . href ;
var ids = new Array ( ) ;
var idRegexp = / [ \ ? \ & ] list_uids = ( [ 0 - 9 \ , ] + ) / ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-08 01:06:33 +00:00
var m = idRegexp . exec ( uri ) ;
if ( m ) {
ids . push ( m [ 1 ] ) ;
2006-08-17 07:56:01 +00:00
lookupPMIDs ( ids , doc ) ;
2006-08-08 01:06:33 +00:00
} else {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var items = new Array ( ) ;
2006-08-17 07:56:01 +00:00
var tableRows = doc . evaluate ( ' ' / / div [ @ class = " ResultSet " ] / table / tbody ' ' , doc ,
nsResolver , XPathResult . ANY_TYPE , null ) ;
var tableRow ;
2006-08-08 01:06:33 +00:00
/ / Go through table rows
2006-08-17 07:56:01 +00:00
while ( tableRow = tableRows . iterateNext ( ) ) {
var link = doc . evaluate ( ' ' . / / a ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
var article = doc . evaluate ( ' ' . / tr [ 2 ] / td [ 2 ] / text ( ) [ 1 ] ' ' , tableRow , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-08-08 01:06:33 +00:00
items [ link . href ] = article . nodeValue ;
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
2006-08-08 01:06:33 +00:00
if ( ! items ) {
return true ;
}
for ( var i in items ) {
var m = idRegexp . exec ( i ) ;
ids . push ( m [ 1 ] ) ;
}
2006-08-17 07:56:01 +00:00
lookupPMIDs ( ids ) ;
}
2006-08-08 01:06:33 +00:00
}
function doSearch ( item ) {
/ / pmid was defined earlier in detectSearch
lookupPMIDs ( [ getPMID ( item . contextObject ) ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-20 16:08:13 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 951c027d-74ac-47d4-a107-9c3069ab7b48 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Embedded RDF ' , ' Simon Kornblith ' , NULL ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var metaTags = doc . getElementsByTagName ( " meta " ) ;
for ( var i = 0 ; i < metaTags . length ; i + + ) {
var tag = metaTags [ i ] . getAttribute ( " name " ) ;
if ( tag & & tag . substr ( 0 , 3 ) . toLowerCase ( ) = = " dc. " ) {
2006-10-02 00:00:50 +00:00
return " webpage " ;
2006-06-20 16:08:13 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return false ;
2006-06-26 18:05:23 +00:00
} ' ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
' function doWeb(doc, url) {
var dc = " http://purl.org/dc/elements/1.1/ " ;
2006-06-21 14:28:51 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / load RDF translator
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
2006-08-31 00:04:11 +00:00
translator . setTranslator ( " 5e3ad958-ac79-463d-812b-a86a9235c28f " ) ;
2006-09-04 21:43:23 +00:00
translator . setHandler ( " itemDone " , function ( obj , newItem ) {
/ / use document title if none given in dublin core
if ( ! newItem . title ) {
newItem . title = doc . title ;
}
/ / add attachment
newItem . attachments . push ( { document : doc } ) ;
/ / add url
newItem . url = doc . location . href ;
newItem . complete ( ) ;
} ) ;
2006-08-31 00:04:11 +00:00
var rdf = translator . getTranslatorObject ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var metaTags = doc . getElementsByTagName ( " meta " ) ;
var foundTitle = false ; / / We can use the page title if necessary
for ( var i = 0 ; i < metaTags . length ; i + + ) {
var tag = metaTags [ i ] . getAttribute ( " name " ) ;
var value = metaTags [ i ] . getAttribute ( " content " ) ;
if ( tag & & value & & tag . substr ( 0 , 3 ) . toLowerCase ( ) = = " dc. " ) {
if ( tag = = " dc.title " ) {
foundTitle = true ;
}
2006-10-02 23:15:27 +00:00
rdf . Zotero . RDF . addStatement ( url , dc + tag . substr ( 3 ) . toLowerCase ( ) , value , true ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( tag & & value & & ( tag = = " author " | | tag = = " author-personal " ) ) {
2006-10-02 23:15:27 +00:00
rdf . Zotero . RDF . addStatement ( url , dc + " creator " , value , true ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( tag & & value & & tag = = " author-corporate " ) {
2006-10-02 23:15:27 +00:00
rdf . Zotero . RDF . addStatement ( url , dc + " creator " , value , true ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-06-24 17:33:35 +00:00
}
2006-08-31 00:04:11 +00:00
rdf . doImport ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 05d07af9-105a-4572-99f6-a8e231c0daef ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' COinS ' , ' Simon Kornblith ' , NULL ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
2006-08-07 00:30:36 +00:00
var spanTags = doc . getElementsByTagName ( " span " ) ;
var encounteredType = false ;
for ( var i = 0 ; i < spanTags . length ; i + + ) {
var spanClass = spanTags [ i ] . getAttribute ( " class " ) ;
if ( spanClass ) {
var spanClasses = spanClass . split ( " " ) ;
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . inArray ( " Z3988 " , spanClasses ) ) {
2006-08-07 00:30:36 +00:00
var spanTitle = spanTags [ i ] . getAttribute ( " title " ) ;
2006-08-07 05:15:30 +00:00
/ / determine if it ' ' s a valid type
2006-10-02 23:15:27 +00:00
var item = new Zotero . Item ;
var success = Zotero . Utilities . parseContextObject ( spanTitle , item ) ;
2006-08-07 00:30:36 +00:00
2006-09-11 22:34:39 +00:00
if ( item . itemType ) {
2006-08-07 05:15:30 +00:00
if ( encounteredType ) {
return " multiple " ;
} else {
2006-09-11 22:34:39 +00:00
encounteredType = item . itemType ;
2006-08-07 05:15:30 +00:00
}
2006-08-07 00:30:36 +00:00
}
}
}
}
return encounteredType ;
} ' ,
2006-08-07 05:15:30 +00:00
' // used to retrieve next COinS object when asynchronously parsing COinS objects
/ / on a page
2006-09-04 17:37:07 +00:00
function retrieveNextCOinS ( needFullItems , newItems , couldUseFullItems , doc ) {
2006-08-07 05:15:30 +00:00
if ( needFullItems . length ) {
var item = needFullItems . shift ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " looking up contextObject " ) ;
var search = Zotero . loadTranslator ( " search " ) ;
2006-08-08 01:06:33 +00:00
search . setHandler ( " itemDone " , function ( obj , item ) {
newItems . push ( item ) ;
} ) ;
search . setHandler ( " done " , function ( ) {
2006-09-04 17:37:07 +00:00
retrieveNextCOinS ( needFullItems , newItems , couldUseFullItems , doc ) ;
2006-08-07 05:15:30 +00:00
} ) ;
2006-08-14 20:34:13 +00:00
search . setSearch ( item ) ;
2006-08-08 01:06:33 +00:00
/ / look for translators
var translators = search . getTranslators ( ) ;
2006-09-04 17:37:07 +00:00
if ( translators . length ) {
2006-08-08 01:06:33 +00:00
search . setTranslator ( translators ) ;
search . translate ( ) ;
} else {
2006-09-04 17:37:07 +00:00
retrieveNextCOinS ( needFullItems , newItems , couldUseFullItems , doc ) ;
2006-08-08 01:06:33 +00:00
}
2006-08-07 00:30:36 +00:00
} else {
2006-09-04 17:37:07 +00:00
completeCOinS ( newItems , couldUseFullItems , doc ) ;
2006-10-02 23:15:27 +00:00
Zotero . done ( true ) ;
2006-08-07 00:30:36 +00:00
}
2006-08-07 05:15:30 +00:00
}
/ / saves all COinS objects
2006-09-04 17:37:07 +00:00
function completeCOinS ( newItems , couldUseFullItems , doc ) {
2006-08-07 05:15:30 +00:00
if ( newItems . length > 1 ) {
var selectArray = new Array ( ) ;
2006-08-07 00:30:36 +00:00
2006-08-07 05:15:30 +00:00
for ( var i in newItems ) {
2006-08-08 01:06:33 +00:00
selectArray [ i ] = newItems [ i ] . title ;
2006-08-07 05:15:30 +00:00
}
2006-10-02 23:15:27 +00:00
selectArray = Zotero . selectItems ( selectArray ) ;
2006-09-04 17:37:07 +00:00
var useIndices = new Array ( ) ;
2006-08-07 05:15:30 +00:00
for ( var i in selectArray ) {
2006-09-04 17:37:07 +00:00
useIndices . push ( i ) ;
}
completeItems ( newItems , useIndices , couldUseFullItems ) ;
} else if ( newItems . length ) {
completeItems ( newItems , [ 0 ] , couldUseFullItems ) ;
}
}
function completeItems ( newItems , useIndices , couldUseFullItems , doc ) {
if ( ! useIndices . length ) {
return ;
}
var i = useIndices . shift ( ) ;
/ / grab full item if requested
if ( couldUseFullItems [ i ] ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " looking up contextObject " ) ;
var search = Zotero . loadTranslator ( " search " ) ;
2006-09-04 17:37:07 +00:00
var firstItem = false ;
search . setHandler ( " itemDone " , function ( obj , newItem ) {
if ( ! firstItem ) {
/ / add doc as attachment
newItem . attachments . push ( { document : doc } ) ;
newItem . complete ( ) ;
firstItem = true ;
}
} ) ;
search . setHandler ( " done " , function ( obj ) {
/ / call next
completeItems ( newItems , useIndices , couldUseFullItems ) ;
} ) ;
search . setSearch ( newItems [ i ] ) ;
var translators = search . getTranslators ( ) ;
if ( translators . length ) {
search . setTranslator ( translators ) ;
search . translate ( ) ;
} else {
2006-08-17 07:56:01 +00:00
/ / add doc as attachment
newItems [ i ] . attachments . push ( { document : doc } ) ;
2006-08-08 01:06:33 +00:00
newItems [ i ] . complete ( ) ;
2006-09-04 17:37:07 +00:00
/ / call next
completeItems ( newItems , useIndices , couldUseFullItems ) ;
2006-08-07 05:15:30 +00:00
}
2006-09-04 17:37:07 +00:00
} else {
/ / add doc as attachment
newItems [ i ] . attachments . push ( { document : doc } ) ;
newItems [ i ] . complete ( ) ;
/ / call next
completeItems ( newItems , useIndices , couldUseFullItems ) ;
2006-08-07 05:15:30 +00:00
}
2006-08-07 00:30:36 +00:00
}
function doWeb ( doc , url ) {
var newItems = new Array ( ) ;
2006-08-07 05:15:30 +00:00
var needFullItems = new Array ( ) ;
2006-09-04 17:37:07 +00:00
var couldUseFullItems = new Array ( ) ;
2006-08-07 00:30:36 +00:00
var spanTags = doc . getElementsByTagName ( " span " ) ;
for ( var i = 0 ; i < spanTags . length ; i + + ) {
var spanClass = spanTags [ i ] . getAttribute ( " class " ) ;
if ( spanClass ) {
var spanClasses = spanClass . split ( " " ) ;
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . inArray ( " Z3988 " , spanClasses ) ) {
2006-08-07 00:30:36 +00:00
var spanTitle = spanTags [ i ] . getAttribute ( " title " ) ;
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
if ( Zotero . Utilities . parseContextObject ( spanTitle , newItem ) ) {
2006-09-04 17:37:07 +00:00
if ( newItem . title ) {
if ( ! newItem . creators . length ) {
/ / if we have a title but little other identifying
/ / information , say we ' ' ll get full item later
couldUseFullItems [ newItems . length ] = true ;
}
2006-08-07 05:15:30 +00:00
/ / title and creators are minimum data to avoid looking up
newItems . push ( newItem ) ;
} else {
/ / retrieve full item
newItem . contextObject = spanTitle ;
needFullItems . push ( newItem ) ;
}
2006-08-07 00:30:36 +00:00
}
}
}
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( needFullItems ) ;
2006-08-07 05:15:30 +00:00
if ( needFullItems . length ) {
/ / retrieve full items asynchronously
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-09-04 17:37:07 +00:00
retrieveNextCOinS ( needFullItems , newItems , couldUseFullItems , doc ) ;
2006-08-07 00:30:36 +00:00
} else {
2006-09-04 17:37:07 +00:00
completeCOinS ( newItems , couldUseFullItems , doc ) ;
2006-08-07 00:30:36 +00:00
}
} ' );
2006-08-07 05:15:30 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 3e684d82-73a3-9a34-095f-19b112d88bbf ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Google Books ' , ' Simon Kornblith ' , ' ^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*) ' ,
2006-08-08 01:06:33 +00:00
' function detectWeb(doc, url) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var re = new RegExp ( ' ' ^ http : / / books \ \ . google \ \ . com / books \ \ ? vid = ( [ ^ & ] + ) . * \ \ & id = ( [ ^ & ] + ) ' ' , ' ' i ' ' ) ;
if ( re . test ( doc . location . href ) ) {
return " book " ;
} else {
return " multiple " ;
}
} ' ,
' function doWeb(doc, url) {
var uri = doc . location . href ;
var newUris = new Array ( ) ;
var re = new RegExp ( ' ' ^ http : / / books \ \ . google \ \ . com / books \ \ ? vid = ( [ ^ & ] + ) . * \ \ & id = ( [ ^ & ] + ) ' ' , ' ' i ' ' ) ;
var m = re . exec ( uri ) ;
if ( m ) {
newUris . push ( ' ' http : / / books . google . com / books ? vid = ' ' + m [ 1 ] + ' ' & id = ' ' + m [ 2 ] ) ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , ' ' http : / / books \ \ . google \ \ . com / books \ \ ? vid = ( [ ^ & ] + ) . * \ \ & id = ( [ ^ & ] + ) ' ' , ' ' ^ ( ? : All matching pages | About this Book | Table of Contents | Index ) ' ' ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Drop " - Page " thing
for ( var i in items ) {
items [ i ] = items [ i ] . replace ( / - Page [ 0 - 9 ] + \ s * $ / , " " ) ;
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! items ) {
return true ;
}
for ( var i in items ) {
var m = re . exec ( i ) ;
newUris . push ( ' ' http : / / books . google . com / books ? vid = ' ' + m [ 1 ] + ' ' & id = ' ' + m [ 2 ] ) ;
}
2006-06-24 17:33:35 +00:00
}
2006-06-21 14:28:51 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( newUris , function ( newDoc ) {
var newItem = new Zotero . Item ( " book " ) ;
2006-08-17 07:56:01 +00:00
newItem . extra = " " ;
newItem . attachments . push ( { title : " Google Books Information Page " , document : newDoc } ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var xpath = ' ' / / table [ @ id = " bib " ] / tbody / tr ' ' ;
2006-08-17 07:56:01 +00:00
var elmts = newDoc . evaluate ( xpath , newDoc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var elmt ;
while ( elmt = elmts . iterateNext ( ) ) {
var field = newDoc . evaluate ( ' ' . / td [ 1 ] / / text ( ) ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
var value = newDoc . evaluate ( ' ' . / td [ 2 ] / / text ( ) ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( field & & value ) {
2006-10-02 23:15:27 +00:00
field = Zotero . Utilities . superCleanString ( field . nodeValue ) ;
value = Zotero . Utilities . cleanString ( value . nodeValue ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( field = = " Title " ) {
newItem . title = value ;
} else if ( field = = " Author(s) " ) {
var authors = value . split ( " , " ) ;
for ( j in authors ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( authors [ j ] , " author " ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
} else if ( field = = " Editor(s) " ) {
var authors = value . split ( " , " ) ;
for ( j in authors ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( authors [ j ] , " editor " ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
} else if ( field = = " Publisher " ) {
newItem . publisher = value ;
} else if ( field = = " Publication Date " ) {
2006-08-31 00:04:11 +00:00
newItem . date = value ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( field = = " ISBN " ) {
newItem . ISBN = value ;
2006-08-17 07:56:01 +00:00
} else if ( field = = " Pages " ) {
newItem . pages = value ;
} else {
newItem . extra + = field + " : " + value + " \n " ;
2006-06-21 15:18:18 +00:00
}
2006-06-21 14:28:51 +00:00
}
}
2006-08-17 07:56:01 +00:00
if ( newItem . extra ) {
newItem . extra = newItem . extra . substr ( newItem . extra , newItem . extra . length - 1 ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) ; } , null ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 57a00950-f0d1-4b41-b6ba-44ff0fc30289 ' , ' 2006-08-26 1:10:00 ' , 1 , 100 , 4 , ' Google Zotero ' , ' Simon Kornblith ' , ' ^http://scholar\.google\.com/scholar ' ,
2006-08-26 05:51:41 +00:00
' function detectWeb(doc, url) {
return " multiple " ;
} ' ,
' function getList(urls, each, done) {
var url = urls . shift ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( url , function ( text ) {
2006-08-26 05:51:41 +00:00
if ( each ) {
each ( text ) ;
}
if ( urls . length ) {
getList ( urls , each , done ) ;
} else if ( done ) {
done ( text ) ;
}
} ) ;
}
function doWeb ( doc , url ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-08-26 06:04:29 +00:00
doc . cookie = " GSP=ID=deadbeefdeadbeef:IN=ebe89f7e83a8fe75+7e6cc990821af63:CF=2; domain=.scholar.google.com " ;
2006-08-26 05:51:41 +00:00
var items = new Array ( ) ;
var relatedLinks = new Array ( ) ;
var links = new Array ( ) ;
var types = new Array ( ) ;
var itemTypes = new Array ( ) ;
var attachments = new Array ( ) ;
var elmts = doc . evaluate ( ' ' / / p [ @ class = " g " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var elmt ;
var i = 0 ;
while ( elmt = elmts . iterateNext ( ) ) {
var isCitation = doc . evaluate ( " ./font[1]/b[1]/text()[1] " , elmt , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
var relatedLink = doc . evaluate ( ' ' . / / a [ font / text ( ) = " Related Articles " ] ' ' ,
elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( relatedLink ) {
relatedLinks [ i ] = relatedLink . href ;
if ( isCitation & & isCitation . nodeValue = = " [CITATION] " ) {
2006-10-02 23:15:27 +00:00
items [ i ] = Zotero . Utilities . getNodeString ( doc , elmt , ' ' . / text ( ) | . / b / text ( ) ' ' , nsResolver ) ;
2006-08-26 05:51:41 +00:00
} else if ( isCitation & & isCitation . nodeValue = = " [BOOK] " ) {
2006-10-02 23:15:27 +00:00
items [ i ] = Zotero . Utilities . getNodeString ( doc , elmt , ' ' . / text ( ) | . / b / text ( ) ' ' , nsResolver ) ;
2006-08-26 05:51:41 +00:00
types [ i ] = " book " ;
} else {
var link = doc . evaluate ( ' ' . / / span [ @ class = " w " ] / a ' ' , elmt , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( link ) {
items [ i ] = link . textContent ;
links [ i ] = link . href ;
}
}
if ( items [ i ] ) {
i + + ;
}
}
}
2006-10-02 23:15:27 +00:00
items = Zotero . selectItems ( items ) ;
2006-08-26 05:51:41 +00:00
if ( ! items ) {
return true ;
}
var relatedMatch = / [ & ? ] q = related : ( [ ^ & ] + ) / ;
var urls = new Array ( ) ;
for ( var i in items ) {
var m = relatedMatch . exec ( relatedLinks [ i ] ) ;
2006-08-31 07:45:03 +00:00
urls . push ( " http://scholar.google.com/scholar.ris?hl=en&lr=&q=info: " + m [ 1 ] + " &oe=UTF-8&output=citation&oi=citation " ) ;
2006-08-26 05:51:41 +00:00
if ( links [ i ] ) {
2006-10-02 23:15:27 +00:00
attachments . push ( [ { title : " Google Zotero Linked Page " , type : " text/html " ,
2006-08-26 05:51:41 +00:00
url : links [ i ] } ] ) ;
} else {
attachments . push ( [ ] ) ;
}
if ( types [ i ] ) { / / for books
itemTypes . push ( types [ i ] ) ;
} else {
itemTypes . push ( null ) ;
}
}
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
2006-08-26 05:51:41 +00:00
translator . setTranslator ( " 32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7 " ) ;
translator . setHandler ( " itemDone " , function ( obj , item ) {
var itemType = itemTypes . shift ( ) ;
if ( itemType ) {
item . itemType = itemType ;
}
item . attachments = attachments . shift ( ) ;
item . complete ( ) ;
} ) ;
getList ( urls , function ( text ) {
translator . setString ( text ) ;
translator . translate ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) } ) ;
2006-08-26 05:51:41 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-08-26 05:51:41 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 9c335444-a562-4f88-b291-607e8f46a9bb ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Berkeley Library ' , ' Simon Kornblith ' , ' ^http://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid= ' ,
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
' function detectWeb(doc, url) {
var resultsRegexp = / \ / WebZ \ / html \ / results . html / i
if ( resultsRegexp . test ( url ) ) {
return " multiple " ;
} else {
return " book " ;
}
} ' ,
' function reformURL(url) {
return url . replace ( / fmtclass = [ ^ & ] * / , " " ) + " :fmtclass=marc " ;
}
function doWeb ( doc , url ) {
var resultsRegexp = / \ / WebZ \ / html \ / results . html / i
if ( resultsRegexp . test ( url ) ) {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , " /WebZ/FETCH " , " ^[0-9]*$ " ) ;
items = Zotero . selectItems ( items ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
if ( ! items ) {
return true ;
}
var urls = new Array ( ) ;
for ( var i in items ) {
urls . push ( reformURL ( i ) ) ;
}
} else {
var urls = [ reformURL ( url ) ] ;
}
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " a6ee60df-1ddc-4aae-bb25-45e0537be973 " ) ;
var marc = translator . getTranslatorObject ( ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urls , function ( newDoc ) {
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
var uri = newDoc . location . href ;
var namespace = newDoc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var elmts = newDoc . evaluate ( ' ' / / table / tbody / tr [ @ valign = " top " ] ' ' ,
newDoc , nsResolver , XPathResult . ANY_TYPE , null ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = new marc . record ( ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
while ( elmt = elmts . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
var field = Zotero . Utilities . superCleanString ( doc . evaluate ( ' ' . / TD [ 1 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
var value = doc . evaluate ( ' ' . / TD [ 2 ] / text ( ) [ 1 ] ' ' , elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . nodeValue ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / remove spacing
value = value . replace ( / ^ \ s + / , " " ) ;
value = value . replace ( / \ s + $ / , " " ) ;
if ( field = = 0 ) {
record . leader = " 00000 " + value ;
} else {
var ind = value [ 3 ] + value [ 5 ] ;
2006-10-02 23:15:27 +00:00
value = Zotero . Utilities . cleanString ( value . substr ( 5 ) ) .
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
replace ( / \ $ ( [ a - z0 - 9 ] ) / g , marc . subfieldDelimiter + " $1 " ) ;
if ( value [ 0 ] ! = marc . subfieldDelimiter ) {
value = marc . subfieldDelimiter + " a " + value ;
}
record . addField ( field , ind , value ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
}
}
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
record . translate ( newItem ) ;
newItem . complete ( ) ;
2006-10-02 23:15:27 +00:00
} , function ( ) { Zotero . done ( ) ; } , null ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-06-29 00:56:50 +00:00
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' d0b1914a-11f1-4dd7-8557-b32fe8a3dd47 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' EBSCOhost ' , ' Simon Kornblith ' , ' ^http://[^/]+/ehost/(?:results|detail) ' ,
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
' function detectWeb(doc, url) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / See if this is a seach results page
2006-08-31 07:45:03 +00:00
var searchResult = doc . evaluate ( ' ' / / table [ @ class = " result-list-inner " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( searchResult ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
return " multiple " ;
2006-08-31 07:45:03 +00:00
}
var persistentLink = doc . evaluate ( ' ' / / tr [ td [ @ class = " left-content-ft " ] / text ( ) = " Persistent link to this record: " ] / td [ @ class = " right-content-ft " ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( persistentLink ) {
return " journalArticle " ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
}
} ' ,
' function fullEscape(text) {
return escape ( text ) . replace ( / \ / / g , " %2F " ) . replace ( / \ + / g , " %2B " ) ;
}
function doWeb ( doc , url ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-08-31 07:45:03 +00:00
var hostRe = new RegExp ( " ^http://([^/]+)/ " ) ;
var m = hostRe . exec ( url ) ;
var host = m [ 1 ] ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var queryRe = / \ ? ( . * ) $ / ;
var m = queryRe . exec ( url ) ;
var queryString = m [ 1 ] ;
var eventValidation = doc . evaluate ( ' ' / / input [ @ name = " __EVENTVALIDATION " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
eventValidation = fullEscape ( eventValidation . value ) ;
var viewState = doc . evaluate ( ' ' / / input [ @ name = " __VIEWSTATE " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
viewState = fullEscape ( viewState . value ) ;
2006-08-31 07:45:03 +00:00
var searchResult = doc . evaluate ( ' ' / / table [ @ class = " result-list-inner " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( searchResult ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var items = new Object ( ) ;
var tableRows = doc . evaluate ( ' ' / / table [ @ class = " cluster-result-record-table " ] / tbody / tr ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var tableRow ;
/ / Go through table rows
while ( tableRow = tableRows . iterateNext ( ) ) {
var title = doc . evaluate ( ' ' . / / a [ @ class = " title-link " ] ' ' , tableRow , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
var addLink = doc . evaluate ( ' ' . / / a [ substring ( @ id , 1 , 11 ) = " addToFolder " ] ' ' , tableRow , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( title & & addLink ) {
items [ addLink . href ] = title . textContent ;
}
}
2006-10-02 23:15:27 +00:00
var items = Zotero . selectItems ( items ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( ! items ) {
return true ;
}
var citations = new Array ( ) ;
var argRe = / ' ' ( [ ^ ' ' ] + ) ' ' / ;
for ( var i in items ) {
var m = argRe . exec ( i ) ;
citations . push ( m [ 1 ] ) ;
}
var saveString = " __EVENTTARGET=FolderItem:AddItem&IsCallBack=true&SearchTerm1=test&listDatabaseGroupings=pdh&SortOptionDropDown=date&__EVENTVALIDATION= " + eventValidation + " &__EVENTARGUMENT= " + citations . join ( " , " ) + " & " ;
} else {
/ / If this is a view page , find the link to the citation
var xpath = ' ' / html / body / div [ @ class = " indent " ] / center / / a [ @ class = " nav " ] ' ' ;
var elmts = doc . evaluate ( xpath , doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var saveCitation = elmts . iterateNext ( ) ;
var viewSavedCitations = elmts . iterateNext ( ) ;
var saveString = " __EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24topAddToFolderControl%24lnkAddToFolder&__EVENTARGUMENT=&__VIEWSTATE= " + viewState + " &__EVENTVALIDATION= " + eventValidation ;
}
var folderString = " __EVENTTARGET=ctl00%24ctl00%24ToolbarArea%24toolbar%24folderControl%24lnkFolder&__EVENTARGUMENT=&__VIEWSTATE= " + viewState + " &__EVENTVALIDATION= " + eventValidation ;
var getString = " __EVENTTARGET=Tabs&IsCallBack=true&chkRemoveFromFolder=true&chkIncludeHTMLFT=true&chkIncludeHTMLLinks=true&CitationFormat=standard&lstFormatStandard=1&lstFormatIndustry=4&cfCommonAb=false&cfCommonAu=true&cfCommonTypDoc=true&cfCommonID=true&cfCommonISSN=true&cfCommonNote=false&cfCommonRevInfo=false&cfCommonSrc=true&cfCommonTi=true&__EVENTARGUMENT=1& "
var viewStateMatch = / < input type = " hidden " name = " __VIEWSTATE " id = " __VIEWSTATE " value = " ([^ " ] + ) " \/>/
var eventValidationMatch = / < input type = " hidden " name = " __EVENTVALIDATION " id = " __EVENTVALIDATION " value = " ([^ " ] + ) " \/>/
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doPost ( url , saveString , function ( ) { / / mark records
Zotero . Utilities . HTTP . doPost ( url , folderString , function ( text ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var postLocation = / < form name = " aspnetForm " method = " post " action = " ([^ " ] + ) " /
var m = postLocation . exec ( text ) ;
var folderURL = m [ 1 ] . replace ( / & amp ; / g , " & " ) ;
m = viewStateMatch . exec ( text ) ;
var folderViewState = m [ 1 ] ;
var folderBase = " __EVENTARGUMENT=&__VIEWSTATE= " + fullEscape ( folderViewState ) ;
m = eventValidationMatch . exec ( text ) ;
var folderEventValidation = m [ 1 ] ;
folderBase + = " &__EVENTVALIDATION= " + fullEscape ( folderEventValidation ) ;
var deliverString = " __EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24btnDelivery%24lnkSave& " + folderBase
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doPost ( " http:// " + host + " /ehost/ " + folderURL ,
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
deliverString , function ( text ) {
var postLocation = / < form name = " aspnetForm " method = " post " action = " ([^ " ] + ) " /
var m = postLocation . exec ( text ) ;
var deliveryURL = m [ 1 ] . replace ( / & amp ; / g , " & " ) ;
var m = viewStateMatch . exec ( text ) ;
var downloadString = " __EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE= " + fullEscape ( m [ 1 ] ) + " &ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24chkRemoveFromFolder=on&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24btnSubmit=Save&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24BibFormat=1 " ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doPost ( " http:// " + host + " /ehost/ " + deliveryURL ,
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
getString , function ( text ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doPost ( " http:// " + host + " /ehost/ " + deliveryURL ,
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
downloadString , function ( text ) { / / get marked
var form = doc . createElement ( " form " ) ;
form . setAttribute ( " method " , " post " ) ;
2006-08-31 07:45:03 +00:00
form . setAttribute ( " action " , " http:// " + host + " /ehost/ " + folderURL ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var args = [
[ " __EVENTARGUMENT " , " " ] ,
[ " __VIEWSTATE " , folderViewState ] ,
[ " __EVENTVALIDATION " , folderEventValidation ] ,
[ " __EVENTTARGET " , " ctl00$ctl00$MainContentArea$MainContentArea$btnBack$lnkBack " ]
] ;
for ( var i in args ) {
var input = doc . createElement ( " input " ) ;
input . setAttribute ( " type " , " hidden " ) ;
input . setAttribute ( " name " , args [ i ] [ 0 ] ) ;
input . setAttribute ( " value " , args [ i ] [ 1 ] ) ;
form . appendChild ( input ) ;
}
var body = doc . getElementsByTagName ( " body " ) ;
body [ 0 ] . appendChild ( form ) ;
form . submit ( ) ;
/ / load translator for RIS
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
translator . setTranslator ( " 32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7 " ) ;
translator . setString ( text ) ;
translator . setHandler ( " itemDone " , function ( obj , item ) {
if ( item . notes & & item . notes [ 0 ] ) {
item . extra = item . notes [ 0 ] . note ;
delete item . notes ;
item . notes = undefined ;
}
item . complete ( ) ;
} ) ;
translator . translate ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
} ) ;
} ) ;
} ) ;
} ) ;
} ) ;
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' ce7a3727-d184-407f-ac12-52837f3361ff ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' New York Times ' , ' Simon Kornblith ' , ' ^http://(?:query\.nytimes\.com/search/query|www\.nytimes\.com/.+) ' ,
2006-08-26 21:36:49 +00:00
' function detectWeb(doc, url) {
2006-08-26 07:27:02 +00:00
if ( doc . title . substr ( 0 , 30 ) = = " The New York Times: Search for " ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var result = doc . evaluate ( ' ' / / div [ @ id = " srchContent " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( result ) {
return " multiple " ;
}
} else {
var metaTags = doc . getElementsByTagName ( " meta " ) ;
if ( metaTags . namedItem ( " hdl " ) & & metaTags . namedItem ( " byl " ) ) {
return " newspaperArticle " ;
}
}
} ' ,
2006-08-26 21:36:49 +00:00
' function getList(urls, each, done) {
var url = urls . shift ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( url , function ( text ) {
2006-08-26 21:36:49 +00:00
if ( each ) {
each ( text , url ) ;
}
if ( urls . length ) {
getList ( urls , each , done ) ;
} else if ( done ) {
done ( text ) ;
}
} ) ;
}
2006-10-02 23:15:27 +00:00
function associateMeta ( newItem , metaTags , field , zoteroField ) {
2006-08-26 07:27:02 +00:00
if ( metaTags [ field ] ) {
2006-10-02 23:15:27 +00:00
newItem [ zoteroField ] = metaTags [ field ] ;
2006-08-26 07:27:02 +00:00
}
}
function scrape ( doc , url ) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " newspaperArticle " ) ;
2006-08-26 07:27:02 +00:00
newItem . publicationTitle = " The New York Times " ;
newItem . ISSN = " 0362-4331 " ;
var metaTags = new Object ( ) ;
if ( url ! = undefined ) {
newItem . url = url ;
var metaTagRe = / < meta [ ^ > ] * > / gi ;
var nameRe = / name = " ([^ " ] + ) " /i;
var contentRe = / content = " ([^ " ] + ) " /i;
var m = doc . match ( metaTagRe ) ;
if ( ! m ) {
return ;
}
for ( var i = 0 ; i < m . length ; i + + ) {
var name = nameRe . exec ( m [ i ] ) ;
var content = contentRe . exec ( m [ i ] ) ;
if ( name & & content ) {
metaTags [ name [ 1 ] ] = content [ 1 ] ;
}
}
if ( ! metaTags [ " hdl " ] ) {
return ;
}
2006-09-07 01:23:13 +00:00
newItem . attachments . push ( { url : url , title : " Article (HTML) " ,
2006-08-26 07:27:02 +00:00
mimeType : " text/html " , downloadable : true } ) ;
} else {
newItem . url = doc . location . href ;
var metaTagHTML = doc . getElementsByTagName ( " meta " ) ;
for ( var i = 0 ; i < metaTagHTML . length ; i + + ) {
var key = metaTagHTML [ i ] . getAttribute ( " name " ) ;
var value = metaTagHTML [ i ] . getAttribute ( " content " ) ;
if ( key & & value ) {
metaTags [ key ] = value ;
}
}
2006-09-07 01:23:13 +00:00
newItem . attachments . push ( { document : doc , title : " Article (HTML) " ,
2006-08-26 07:27:02 +00:00
downloadable : true } ) ;
}
associateMeta ( newItem , metaTags , " dat " , " date " ) ;
associateMeta ( newItem , metaTags , " hdl " , " title " ) ;
associateMeta ( newItem , metaTags , " dsk " , " section " ) ;
associateMeta ( newItem , metaTags , " articleid " , " accessionNumber " ) ;
if ( metaTags [ " byl " ] ) {
2006-10-02 23:15:27 +00:00
var author = Zotero . Utilities . cleanString ( metaTags [ " byl " ] ) ;
2006-08-26 07:27:02 +00:00
if ( author . substr ( 0 , 3 ) . toLowerCase ( ) = = " by " ) {
author = author . substr ( 3 ) ;
}
var authors = author . split ( " and " ) ;
for each ( var author in authors ) {
/ / fix capitalization
var words = author . split ( " " ) ;
for ( var i in words ) {
words [ i ] = words [ i ] [ 0 ] . toUpperCase ( ) + words [ i ] . substr ( 1 ) . toLowerCase ( ) ;
}
author = words . join ( " " ) ;
if ( words [ 0 ] = = " The " ) {
2006-10-03 21:08:02 +00:00
newItem . creators . push ( { lastName : author , creatorType : " author " , isInstitution : true } ) ;
2006-08-26 07:27:02 +00:00
} else {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( author , " author " ) ) ;
2006-08-26 07:27:02 +00:00
}
}
}
if ( metaTags [ " keywords " ] ) {
var keywords = metaTags [ " keywords " ] ;
newItem . tags = keywords . split ( " , " ) ;
for ( var i in newItem . tags ) {
newItem . tags [ i ] = newItem . tags [ i ] . replace ( " " , " , " ) ;
}
}
newItem . complete ( ) ;
}
function doWeb ( doc , url ) {
if ( doc . title . substr ( 0 , 30 ) = = " The New York Times: Search for " ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var result = doc . evaluate ( ' ' / / div [ @ id = " srchContent " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , result , ' ' ^ http : / / www . nytimes . com / . * \ . html $ ' ' ) ;
items = Zotero . selectItems ( items ) ;
2006-08-26 07:27:02 +00:00
if ( ! items ) {
return true ;
}
var urls = new Array ( ) ;
for ( var i in items ) {
urls . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
getList ( urls , scrape , function ( ) { Zotero . done ( ) ; } , null ) ;
2006-08-26 07:27:02 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-08-26 07:27:02 +00:00
} else {
scrape ( doc ) ;
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 1e6d1529-246f-4429-84e2-1f1b180b250d ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Chronicle of Higher Education ' , ' Simon Kornblith ' , ' ^http://chronicle\.com/ ' ,
2006-09-07 01:23:13 +00:00
' function detectWeb(doc, url) {
var articleRegexp = / ^ http : \ / \ / chronicle \ . com \ / ( ? : daily | weekly ) \ / [ ^ / ] + \ / /
if ( articleRegexp . test ( url ) ) {
if ( doc . location . href . indexOf ( " weekly " ) ! = - 1 ) {
return " magazineArticle " ;
} else {
2006-10-02 00:00:50 +00:00
return " webpage " ;
2006-09-07 01:23:13 +00:00
}
} else {
var aTags = doc . getElementsByTagName ( " a " ) ;
for ( var i = 0 ; i < aTags . length ; i + + ) {
if ( articleRegexp . test ( aTags [ i ] . href ) ) {
return " multiple " ;
}
}
}
} ' ,
2006-10-02 23:15:27 +00:00
' function associateMeta(newItem, metaTags, field, zoteroField) {
2006-09-07 01:23:13 +00:00
if ( metaTags . namedItem ( field ) ) {
2006-10-02 23:15:27 +00:00
newItem [ zoteroField ] = Zotero . Utilities . cleanString ( metaTags . namedItem ( field ) . getAttribute ( " content " ) ) ;
2006-09-07 01:23:13 +00:00
}
}
function scrape ( doc ) {
if ( doc . location . href . indexOf ( " weekly " ) ! = - 1 ) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " magazineArticle " ) ;
2006-09-07 01:23:13 +00:00
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / go in search of pages
var content = doc . evaluate ( ' ' / html / body / table [ @ class = " layout " ] / tbody / tr [ 1 ] / td [ @ class = " content " ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( content ) {
var pagesRegexp = / http : \ / \ / chronicle . com \ nSection : [ ^ \ n ] + \ nVolume [ 0 - 9 ] + , Issue [ 0 - 9 ] + , Pages ? ( [ A - Z0 - 9 \ - ] + ) / ;
var m = pagesRegexp . exec ( content . textContent ) ;
if ( m ) {
newItem . pages = m [ 1 ] ;
}
}
} else {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " webpage " ) ;
2006-09-07 01:23:13 +00:00
}
newItem . publicationTitle = " The Chronicle of Higher Education " ;
newItem . ISSN = " 0009-5982 " ;
newItem . url = doc . location . href ;
var metaTags = doc . getElementsByTagName ( " meta " ) ;
newItem . attachments . push ( { document : doc , title : " Article (HTML) " ,
downloadable : true } ) ;
associateMeta ( newItem , metaTags , " published_date " , " date " ) ;
associateMeta ( newItem , metaTags , " headline " , " title " ) ;
associateMeta ( newItem , metaTags , " section " , " section " ) ;
associateMeta ( newItem , metaTags , " volume " , " volume " ) ;
associateMeta ( newItem , metaTags , " issue " , " issue " ) ;
if ( metaTags . namedItem ( " byline " ) ) {
2006-10-02 23:15:27 +00:00
var author = Zotero . Utilities . cleanString ( metaTags . namedItem ( " byline " ) . getAttribute ( " content " ) ) ;
2006-09-07 01:23:13 +00:00
if ( author . substr ( 0 , 3 ) . toLowerCase ( ) = = " by " ) {
author = author . substr ( 3 ) ;
}
var authors = author . split ( " and " ) ;
for each ( var author in authors ) {
/ / fix capitalization
var words = author . split ( " " ) ;
for ( var i in words ) {
words [ i ] = words [ i ] [ 0 ] . toUpperCase ( ) + words [ i ] . substr ( 1 ) . toLowerCase ( ) ;
}
author = words . join ( " " ) ;
if ( words [ 0 ] = = " The " ) {
2006-10-03 21:08:02 +00:00
newItem . creators . push ( { lastName : author , creatorType : " author " , isInstitution : true } ) ;
2006-09-07 01:23:13 +00:00
} else {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( author , " author " ) ) ;
2006-09-07 01:23:13 +00:00
}
}
}
newItem . complete ( ) ;
}
function doWeb ( doc , url ) {
var articleRegexp = / ^ http : \ / \ / chronicle \ . com \ / ( ? : daily | weekly ) \ / [ ^ / ] + \ / / ;
if ( articleRegexp . test ( url ) ) {
scrape ( doc ) ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , ' ' ^ http : / / chronicle \ \ . com / ( ? : daily | weekly ) / [ ^ / ] + / ' ' ) ;
items = Zotero . selectItems ( items ) ;
2006-09-07 01:23:13 +00:00
if ( ! items ) {
return true ;
}
var urls = new Array ( ) ;
for ( var i in items ) {
urls . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urls , scrape , function ( ) { Zotero . done ( ) ; } ) ;
Zotero . wait ( ) ;
2006-09-07 01:23:13 +00:00
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 4c164cc8-be7b-4d02-bfbf-37a5622dfd56 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' New York Review of Books ' , ' Simon Kornblith ' , ' ^http://www\.nybooks\.com/ ' ,
2006-09-07 01:23:13 +00:00
' function detectWeb(doc, url) {
var articleRegexp = / ^ http : \ / \ / www \ . nybooks \ . com \ / articles \ / [ 0 - 9 ] + /
if ( articleRegexp . test ( url ) ) {
return " journalArticle " ;
} else {
var aTags = doc . getElementsByTagName ( " a " ) ;
for ( var i = 0 ; i < aTags . length ; i + + ) {
if ( articleRegexp . test ( aTags [ i ] . href ) ) {
return " multiple " ;
}
}
}
} ' ,
2006-10-02 23:15:27 +00:00
' function associateMeta(newItem, metaTags, field, zoteroField) {
2006-09-07 01:23:13 +00:00
if ( metaTags . namedItem ( field ) ) {
2006-10-02 23:15:27 +00:00
newItem [ zoteroField ] = Zotero . Utilities . cleanString ( metaTags . namedItem ( field ) . getAttribute ( " content " ) ) ;
2006-09-07 01:23:13 +00:00
}
}
function scrape ( doc ) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " journalArticle " ) ;
2006-09-07 01:23:13 +00:00
newItem . publicationTitle = " The New York Review of Books " ;
newItem . ISSN = " 0028-7504 " ;
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
newItem . url = doc . location . href ;
var metaTags = doc . getElementsByTagName ( " meta " ) ;
newItem . attachments . push ( { document : doc , title : " Review (HTML) " ,
downloadable : true } ) ;
associateMeta ( newItem , metaTags , " dc.title " , " title " ) ;
var info = doc . evaluate ( ' ' / / div [ @ id = " center-content " ] / h4 [ @ class = " date " ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( info ) {
/ / get date ( which is in an a tag )
newItem . date = doc . evaluate ( " ./a " , info , nsResolver , XPathResult . ANY_TYPE ,
null ) . iterateNext ( ) ;
if ( newItem . date ) {
newItem . date = newItem . date . textContent ;
}
2006-10-02 23:15:27 +00:00
info = Zotero . Utilities . cleanString ( info . textContent ) ;
2006-09-07 01:23:13 +00:00
/ / get volume and issue
var infoRe = / Volume ( [ 0 - 9 ] + ) , Number ( [ 0 - 9 ] + ) / ;
var m = infoRe . exec ( info ) ;
if ( m ) {
newItem . volume = m [ 1 ] ;
newItem . issue = m [ 2 ] ;
}
}
var authors = doc . evaluate ( ' ' / / div [ @ id = " center-content " ] / h4 / a [ substring ( @ href , 1 , 9 ) = " /authors/ " ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var author ;
while ( author = authors . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( author . textContent , " author " , false ) ) ;
2006-09-07 01:23:13 +00:00
}
newItem . complete ( ) ;
}
function doWeb ( doc , url ) {
var articleRegexp = / ^ http : \ / \ / www \ . nybooks \ . com \ / articles \ / [ 0 - 9 ] + /
if ( articleRegexp . test ( url ) ) {
scrape ( doc ) ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , " ^http://www\\.nybooks\\.com/articles/[0-9]+/ " ) ;
items = Zotero . selectItems ( items ) ;
2006-09-07 01:23:13 +00:00
if ( ! items ) {
return true ;
}
var urls = new Array ( ) ;
for ( var i in items ) {
2006-09-08 05:47:47 +00:00
urls . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urls , scrape , function ( ) { Zotero . done ( ) ; } ) ;
Zotero . wait ( ) ;
2006-09-08 05:47:47 +00:00
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' d1bf1c29-4432-4ada-8893-2e29fc88fd9e ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' Washington Post ' , ' Simon Kornblith ' , ' ^http://www\.washingtonpost\.com/ ' ,
2006-09-08 05:47:47 +00:00
' function detectWeb(doc, url) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / don ' ' t say we can scrape when we can ' ' t ; make sure user is logged in
var signedIn = doc . evaluate ( ' ' / / a [ text ( ) = " Sign out " or text ( ) = " Sign Out " ] ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( ! signedIn ) {
return ;
}
var articleRegexp = / http : \ / \ / www \ . washingtonpost \ . com \ / wp - dyn \ / content \ / article \ / [ 0 - 9 ] + \ / [ 0 - 9 ] + \ / [ 0 - 9 ] + \ / [ ^ \ / ] + \ . html /
if ( articleRegexp . test ( url ) ) {
return " newspaperArticle " ;
} else {
var aTags = doc . getElementsByTagName ( " a " ) ;
for ( var i = 0 ; i < aTags . length ; i + + ) {
if ( articleRegexp . test ( aTags [ i ] . href ) ) {
return " multiple " ;
}
}
}
} ' ,
' function scrape(doc) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( " newspaperArticle " ) ;
2006-09-08 05:47:47 +00:00
newItem . publicationTitle = " The Washington Post " ;
newItem . ISSN = " 0740-5421 " ;
newItem . url = doc . location . href ;
var metaTags = doc . getElementsByTagName ( " meta " ) ;
newItem . attachments . push ( { document : doc , title : " Article (HTML) " ,
downloadable : true } ) ;
/ / grab title from doc title
newItem . title = doc . title ;
var byline = doc . evaluate ( ' ' / / div [ @ id = " byline " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
/ / grab authors from byline
if ( byline ) {
var authors = byline . textContent . substr ( 3 ) . split ( " and " ) ;
for each ( var author in authors ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( author , " author " ) ) ;
2006-09-08 05:47:47 +00:00
}
}
var fonts = doc . evaluate ( ' ' / / div [ @ id = " article " ] / p / font / text ( ) ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var font ;
while ( font = fonts . iterateNext ( ) ) {
2006-09-09 19:47:47 +00:00
var pageRe = / ( [ ^ ; ] + ) ; ( ? : [ \ xA0 ] + Pages ? [ \ xA0 ] + ( [ A - Z0 - 9 \ - ] + ) ) ? /
2006-09-08 05:47:47 +00:00
/ / grab pages and date
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( Zotero . Utilities . cleanString ( font . nodeValue ) ) ;
2006-09-08 05:47:47 +00:00
var m = pageRe . exec ( font . nodeValue ) ;
if ( m ) {
newItem . date = m [ 1 ] ;
newItem . pages = m [ 2 ] ;
break ;
}
}
/ / grab tags from meta tag
var keywords = doc . getElementsByTagName ( " meta " ) ;
if ( keywords ) {
keywords = keywords . namedItem ( " keywords " ) ;
if ( keywords ) {
keywords = keywords . getAttribute ( " content " ) ;
if ( keywords ) {
newItem . tags = keywords . split ( / , ? / ) ;
}
}
}
newItem . complete ( ) ;
}
function doWeb ( doc , url ) {
var articleRegexp = / http : \ / \ / www \ . washingtonpost \ . com \ / wp - dyn \ / content \ / article \ / [ 0 - 9 ] + \ / [ 0 - 9 ] + \ / [ 0 - 9 ] + \ / [ ^ \ / ] + \ . html /
if ( articleRegexp . test ( url ) ) {
scrape ( doc ) ;
} else {
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , articleRegexp ) ;
items = Zotero . selectItems ( items ) ;
2006-09-08 05:47:47 +00:00
if ( ! items ) {
return true ;
}
var urls = new Array ( ) ;
for ( var i in items ) {
2006-09-07 01:23:13 +00:00
urls . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urls , scrape , function ( ) { Zotero . done ( ) ; } ) ;
Zotero . wait ( ) ;
2006-09-07 01:23:13 +00:00
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' a07bb62a-4d2d-4d43-ba08-d9679a0122f8 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' ABC-CLIO ' , ' Simon Kornblith ' , ' ^http://serials\.abc-clio\.com/active/go/ABC-Clio-Serials_v4.1$ ' ,
2006-08-26 21:36:49 +00:00
' function detectWeb(doc, url) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var result = doc . evaluate ( ' ' / / table [ @ class = " rc_main " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( result ) {
return " multiple " ;
}
} ' ,
' function doWeb(doc, url) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
var availableItems = new Array ( ) ;
var availableAttachments = new Array ( ) ;
var elmts = doc . evaluate ( ' ' / / table [ @ class = " rc_main " ] ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) ;
var elmt ;
while ( elmt = elmts . iterateNext ( ) ) {
var title = doc . evaluate ( ' ' . / tbody / tr / td [ b / text ( ) = " Title: " ] ' ' ,
elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
var checkbox = doc . evaluate ( ' ' . / / input [ @ type = " checkbox " ] ' ' ,
elmt , nsResolver , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( title , checkbox ) {
checkbox = checkbox . name ;
2006-10-02 23:15:27 +00:00
availableItems [ checkbox ] = Zotero . Utilities . cleanString ( title . textContent ) . substr ( 6 ) ;
2006-08-26 21:36:49 +00:00
var links = doc . evaluate ( ' ' . / tbody / tr / td [ b / text ( ) = " Fulltext: [ " ] / a ' ' ,
elmt , nsResolver , XPathResult . ANY_TYPE , null ) ;
var link ;
var attach = new Array ( ) ;
while ( link = links . iterateNext ( ) ) {
2006-10-02 23:15:27 +00:00
attach . push ( { url : link . href , title : Zotero . Utilities . cleanString ( link . textContent ) + " Full Text " ,
2006-08-26 21:36:49 +00:00
mimeType : " text/html " } ) ;
}
availableAttachments [ checkbox ] = attach ;
}
}
2006-10-02 23:15:27 +00:00
var items = Zotero . selectItems ( availableItems ) ;
2006-08-26 21:36:49 +00:00
if ( ! items ) {
return true ;
}
var postString = " _defaultoperation=Download+Options&research_field=&research_value=&jumpto= " ;
var attachments = new Array ( ) ;
for ( var i in availableItems ) {
postString + = " &_checkboxname= " + i + ( items [ i ] ? " & " + i + " =1 " : " " ) ;
if ( items [ i ] ) {
attachments . push ( availableAttachments [ i ] ) ;
}
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doPost ( url , postString , function ( text ) {
Zotero . Utilities . HTTP . doPost ( url , " _appname=serials&_defaultoperation=Download+Documents&_formname=download&download_format=citation&download_which=tagged&download_where=ris&mailto=&mailreplyto=&mailsubject=&mailmessage= " ,
2006-08-26 21:57:02 +00:00
function ( text ) {
/ / get link
var linkRe = / < a \ s + class = " button " \ s + href = " ([^ " ] + ) " \s+id= " resource_link " /i;
var m = linkRe . exec ( text ) ;
if ( ! m ) {
throw ( " regular expression failed! " ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( m [ 1 ] , function ( text ) {
2006-08-26 21:36:49 +00:00
/ / load translator for RIS
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
2006-08-26 21:36:49 +00:00
translator . setTranslator ( " 32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7 " ) ;
translator . setString ( text ) ;
translator . setHandler ( " itemDone " , function ( obj , item ) {
if ( item . notes & & item . notes [ 0 ] ) {
item . extra = item . notes [ 0 ] . note ;
delete item . notes ;
item . notes = undefined ;
}
/ / grab uni data from thesis
if ( item . itemType = = " thesis " ) {
var re = / ^ ( . + ? ) ( [ 0 - 9 ] { 4 } ) \ . ( [ 0 - 9 ] + ) pp \ . ( . * ) $ / ;
var m = re . exec ( item . extra ) ;
if ( m ) {
item . publisher = m [ 1 ] ;
item . date = m [ 2 ] ;
item . pages = m [ 3 ] ;
item . extra = m [ 4 ] ;
}
}
/ / fix periods
for ( var i in item . creators ) {
var nameLength = item . creators [ i ] . firstName . length ;
if ( item . creators [ i ] . firstName [ nameLength - 1 ] = = " . " ) {
item . creators [ i ] . firstName = item . creators [ i ] . firstName . substr ( 0 , nameLength - 1 ) ;
}
}
2006-08-26 21:57:02 +00:00
for ( var i in item . tags ) {
var tagLength = item . tags [ i ] . length ;
if ( item . tags [ i ] [ tagLength - 1 ] = = " . " ) {
item . tags [ i ] = item . tags [ i ] . substr ( 0 , tagLength - 1 ) ;
}
}
2006-08-26 21:36:49 +00:00
/ / fix title
2006-10-02 23:15:27 +00:00
item . title = Zotero . Utilities . superCleanString ( item . title ) ;
2006-08-26 21:36:49 +00:00
/ / add attachments
item . attachments = attachments . shift ( ) ;
item . complete ( ) ;
} ) ;
translator . translate ( ) ;
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-26 21:36:49 +00:00
} ) ;
} ) ;
} ) ;
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-08-26 21:36:49 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' fa396dd4-7d04-4f99-95e1-93d6f355441d ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 4 , ' CiteSeer ' , ' Simon Kornblith ' , ' ^http://(?:citeseer\.ist\.psu\.edu/|citeseer\.csail\.mit\.edu/|citeseer\.ifi\.unizh\.ch/|citeseer\.comp\.nus\.edu\.sg/) ' ,
2006-09-08 01:59:22 +00:00
' function detectWeb(doc, url) {
var searchRe = / http : \ / \ / [ ^ \ / ] + \ / ci ? s / ;
if ( searchRe . test ( url ) ) {
return " multiple " ;
} else {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
if ( doc . evaluate ( ' ' / html / body / span [ @ class = " m " ] / pre ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
return " journalArticle " ;
}
}
} ' ,
' function scrape(doc) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / figure out what attachments to add
var attachments = new Array ( ) ;
var results = doc . evaluate ( ' ' / html / body / span [ @ class = " m " ] / table [ @ class = " h " ] / tbody / tr / td [ 4 ] / center / font / a ' ' ,
doc , nsResolver , XPathResult . ANY_TYPE , null ) ;
var elmt ;
var acceptableTypes = [ " PDF " , " PS " , " PS.gz " ] ;
var mimeTypes = [ " application/pdf " , " application/postscript " , " application/gzip " ] ;
while ( elmt = results . iterateNext ( ) ) {
var kind = elmt . textContent . toString ( ) ;
var index = acceptableTypes . indexOf ( kind ) ;
if ( index ! = - 1 ) {
var attachment = { url : elmt . href , mimeType : mimeTypes [ index ] ,
title : " Full Text " + kind } ;
if ( kind = = " PDF " ) {
attachment . downloadable = true ;
}
attachments . push ( attachment ) ;
}
}
var bibtex = doc . evaluate ( ' ' / html / body / span [ @ class = " m " ] / pre / text ( ) ' ' , doc , nsResolver ,
XPathResult . ANY_TYPE , null ) . iterateNext ( ) ;
if ( bibtex ) {
2006-10-02 23:15:27 +00:00
var translator = Zotero . loadTranslator ( " import " ) ;
2006-09-08 01:59:22 +00:00
translator . setTranslator ( " 9cb70025-a888-4a29-a210-93ec52da40d4 " ) ;
translator . setString ( bibtex . nodeValue . toString ( ) ) ;
translator . setHandler ( " itemDone " , function ( obj , item ) {
if ( item . url ) { / / add http to url
item . url = " http:// " + item . url ;
}
item . attachments = attachments ;
item . attachments . push ( { document : doc , downloadable : false ,
title : " CiteSeer Abstract " } ) ;
item . complete ( ) ;
} ) ;
translator . translate ( ) ;
} else {
throw " No BibTeX found! " ;
}
}
function doWeb ( doc , url ) {
var searchRe = / http : \ / \ / ( [ ^ \ / ] + ) \ / ci ? s / ;
var m = searchRe . exec ( doc . location . href ) ;
if ( m ) {
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
2006-10-02 23:15:27 +00:00
var items = Zotero . Utilities . getItemArray ( doc , doc , " ^http:// " + m [ 1 ] + " /[^/]+.html " ) ;
items = Zotero . selectItems ( items ) ;
2006-09-08 01:59:22 +00:00
if ( ! items ) {
return true ;
}
var urls = new Array ( ) ;
for ( var i in items ) {
urls . push ( i ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urls , scrape , function ( ) { Zotero . done ( ) ; } ) ;
Zotero . wait ( ) ;
2006-09-08 01:59:22 +00:00
} else {
scrape ( doc ) ;
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' e07e9b8c-0e98-4915-bb5a-32a08cb2f365 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 8 , ' Open WorldCat ' , ' Simon Kornblith ' , ' http://partneraccess.oclc.org/ ' ,
2006-08-08 01:06:33 +00:00
' function detectSearch(item) {
if ( item . itemType = = " book " | | item . itemType = = " bookSection " ) {
return true ;
}
return false ;
} ' ,
' // creates an item from an Open WorldCat document
function processOWC ( doc ) {
var spanTags = doc . getElementsByTagName ( " span " ) ;
for ( var i = 0 ; i < spanTags . length ; i + + ) {
var spanClass = spanTags [ i ] . getAttribute ( " class " ) ;
if ( spanClass ) {
var spanClasses = spanClass . split ( " " ) ;
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . inArray ( " Z3988 " , spanClasses ) ) {
2006-08-08 01:06:33 +00:00
var spanTitle = spanTags [ i ] . getAttribute ( " title " ) ;
2006-10-02 23:15:27 +00:00
var item = new Zotero . Item ( ) ;
if ( Zotero . Utilities . parseContextObject ( spanTitle , item ) ) {
item . title = Zotero . Utilities . capitalizeTitle ( item . title ) ;
2006-08-08 01:06:33 +00:00
item . complete ( ) ;
return true ;
} else {
return false ;
}
}
}
}
return false ;
}
function doSearch ( item ) {
if ( item . contextObject ) {
var co = item . contextObject ;
} else {
2006-10-02 23:15:27 +00:00
var co = Zotero . Utilities . createContextObject ( item ) ;
2006-08-08 01:06:33 +00:00
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . loadDocument ( " http://partneraccess.oclc.org/wcpa/servlet/OpenUrl? " + co , function ( doc ) {
2006-08-08 01:06:33 +00:00
/ / find new COinS in the Open WorldCat page
if ( processOWC ( doc ) ) { / / we got a single item page
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-08 01:06:33 +00:00
} else { / / assume we have a search results page
var items = new Array ( ) ;
var namespace = doc . documentElement . namespaceURI ;
var nsResolver = namespace ? function ( prefix ) {
if ( prefix = = ' ' x ' ' ) return namespace ; else return null ;
} : null ;
/ / first try to get only books
var elmts = doc . evaluate ( ' ' / / table [ @ class = " tableLayout " ] / tbody / tr / td [ @ class = " content " ] / table [ @ class = " tableResults " ] / tbody / tr [ td / img [ @ alt = " Book " ] ] / td / div [ @ class = " title " ] / a ' ' , doc , nsResolver , Components . interfaces . nsIDOMXPathResult . ANY_TYPE , null ) ;
var elmt = elmts . iterateNext ( ) ;
if ( ! elmt ) { / / if that fails , look for other options
var elmts = doc . evaluate ( ' ' / / table [ @ class = " tableLayout " ] / tbody / tr / td [ @ class = " content " ] / table [ @ class = " tableResults " ] / tbody / tr [ td / img [ @ alt = " Book " ] ] / td / div [ @ class = " title " ] / a ' ' , doc , nsResolver , Components . interfaces . nsIDOMXPathResult . ANY_TYPE , null ) ;
elmt = elmts . iterateNext ( )
}
var urlsToProcess = new Array ( ) ;
do {
urlsToProcess . push ( elmt . href ) ;
} while ( elmt = elmts . iterateNext ( ) ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . processDocuments ( urlsToProcess , function ( doc ) {
2006-08-08 01:06:33 +00:00
/ / per URL
2006-08-11 15:28:18 +00:00
processOWC ( doc ) ;
2006-08-08 01:06:33 +00:00
} , function ( ) { / / done
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-08 01:06:33 +00:00
} ) ;
}
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
} , null ) ;
2006-08-08 01:06:33 +00:00
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-08-08 01:06:33 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 11645bd1-0420-45c1-badb-53fb41eeb753 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 8 , ' CrossRef ' , ' Simon Kornblith ' , ' http://partneraccess.oclc.org/ ' ,
2006-08-08 01:06:33 +00:00
' function detectSearch(item) {
if ( item . itemType = = " journal " ) {
return true ;
}
return false ;
} ' ,
' function processCrossRef(xmlOutput) {
xmlOutput = xmlOutput . replace ( / < \ ? xml [ ^ > ] * \ ? > / , " " ) ;
/ / parse XML with E4X
var qr = new Namespace ( " http://www.crossref.org/qrschema/2.0 " ) ;
try {
var xml = new XML ( xmlOutput ) ;
} catch ( e ) {
return false ;
}
/ / ensure status is valid
var status = xml . qr : : query_result . qr : : body . qr : : query . @ status . toString ( ) ;
if ( status ! = " resolved " & & status ! = " multiresolved " ) {
return false ;
}
var query = xml . qr : : query_result . qr : : body . qr : : query ;
2006-10-02 23:15:27 +00:00
var item = new Zotero . Item ( " journalArticle " ) ;
2006-08-08 01:06:33 +00:00
/ / try to get a DOI
item . DOI = query . qr : : doi . ( @ type = = " journal_article " ) . text ( ) . toString ( ) ;
if ( ! item . DOI ) {
item . DOI = query . qr : : doi . ( @ type = = " book_title " ) . text ( ) . toString ( ) ;
}
if ( ! item . DOI ) {
item . DOI = query . qr : : doi . ( @ type = = " book_content " ) . text ( ) . toString ( ) ;
}
/ / try to get an ISSN ( no print / electronic preferences )
item . ISSN = query . qr : : issn [ 0 ] . text ( ) . toString ( ) ;
/ / get title
item . title = query . qr : : article_title . text ( ) . toString ( ) ;
/ / get publicationTitle
item . publicationTitle = query . qr : : journal_title . text ( ) . toString ( ) ;
/ / get author
2006-10-02 23:15:27 +00:00
item . creators . push ( Zotero . Utilities . cleanAuthor ( query . qr : : author . text ( ) . toString ( ) , " author " , true ) ) ;
2006-08-08 01:06:33 +00:00
/ / get volume
item . volume = query . qr : : volume . text ( ) . toString ( ) ;
/ / get issue
item . issue = query . qr : : issue . text ( ) . toString ( ) ;
/ / get year
item . date = query . qr : : year . text ( ) . toString ( ) ;
/ / get edition
item . edition = query . qr : : edition_number . text ( ) . toString ( ) ;
/ / get first page
item . pages = query . qr : : first_page . text ( ) . toString ( ) ;
item . complete ( ) ;
return true ;
}
function doSearch ( item ) {
if ( item . contextObject ) {
var co = item . contextObject ;
if ( co . indexOf ( " url_ver= " ) = = - 1 ) {
co = " url_ver=Z39.88-2004 " + co ;
}
} else {
2006-10-02 23:15:27 +00:00
var co = Zotero . Utilities . createContextObject ( item ) ;
2006-08-08 01:06:33 +00:00
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . HTTP . doGet ( " http://www.crossref.org/openurl/? " + co + " &noredirect=true " , function ( responseText ) {
2006-08-08 01:06:33 +00:00
processCrossRef ( responseText ) ;
2006-10-02 23:15:27 +00:00
Zotero . done ( ) ;
2006-08-08 01:06:33 +00:00
} ) ;
2006-10-02 23:15:27 +00:00
Zotero . wait ( ) ;
2006-08-08 01:06:33 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 0e2235e7-babf-413c-9acf-f27cce5f059c ' , ' 2006-10-02 17:00:00 ' , 1 , 50 , 3 , ' MODS ' , ' Simon Kornblith ' , ' xml ' ,
' Zotero.addOption("exportNotes", true);
2006-08-08 02:46:52 +00:00
function detectImport ( ) {
2006-10-02 23:15:27 +00:00
var read = Zotero . read ( 512 ) ;
2006-08-08 02:46:52 +00:00
var modsTagRegexp = / < mods [ ^ > ] + > /
if ( modsTagRegexp . test ( read ) ) {
return true ;
}
} ' ,
2006-06-29 00:56:50 +00:00
' var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doExport ( ) {
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " utf-8 " ) ;
2006-08-06 09:34:51 +00:00
var modsCollection = < modsCollection xmlns = " http://www.loc.gov/mods/v3 " xmlns : xsi = " http://www.w3.org/2001/XMLSchema-instance " xsi : schemaLocation = " http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd " / > ;
2006-06-29 00:56:50 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var item ;
2006-10-02 23:15:27 +00:00
while ( item = Zotero . nextItem ( ) ) {
var isPartialItem = Zotero . Utilities . inArray ( item . itemType , partialItemTypes ) ;
2006-06-29 00:56:50 +00:00
var mods = < mods / > ;
/* * CORE FIELDS * */
/ / XML tag titleInfo ; object field title
2006-07-06 03:39:32 +00:00
if ( item . title ) {
mods . titleInfo . title = item . title ;
}
2006-06-29 00:56:50 +00:00
/ / XML tag typeOfResource / genre ; object field type
var modsType , marcGenre ;
if ( item . itemType = = " book " | | item . itemType = = " bookSection " ) {
modsType = " text " ;
marcGenre = " book " ;
} else if ( item . itemType = = " journalArticle " | | item . itemType = = " magazineArticle " ) {
modsType = " text " ;
marcGenre = " periodical " ;
} else if ( item . itemType = = " newspaperArticle " ) {
modsType = " text " ;
marcGenre = " newspaper " ;
} else if ( item . itemType = = " thesis " ) {
modsType = " text " ;
marcGenre = " theses " ;
} else if ( item . itemType = = " letter " ) {
modsType = " text " ;
marcGenre = " letter " ;
} else if ( item . itemType = = " manuscript " ) {
modsType = " text " ;
modsType . @ manuscript = " yes " ;
} else if ( item . itemType = = " interview " ) {
modsType = " text " ;
2006-08-06 09:34:51 +00:00
marcGenre = " interview " ;
2006-06-29 00:56:50 +00:00
} else if ( item . itemType = = " film " ) {
modsType = " moving image " ;
marcGenre = " motion picture " ;
} else if ( item . itemType = = " artwork " ) {
modsType = " still image " ;
marcGenre = " art original " ;
2006-10-02 00:00:50 +00:00
} else if ( item . itemType = = " webpage " ) {
2006-06-29 00:56:50 +00:00
modsType = " multimedia " ;
marcGenre = " web site " ;
2006-09-09 22:00:04 +00:00
} else if ( item . itemType = = " note " | | item . itemType = = " attachment " ) {
2006-08-06 09:34:51 +00:00
continue ;
2006-06-29 00:56:50 +00:00
}
mods . typeOfResource = modsType ;
mods . genre + = < genre authority = " local " > { item . itemType } < / genre > ;
2006-07-06 03:39:32 +00:00
if ( marcGenre ) {
mods . genre + = < genre authority = " marcgt " > { marcGenre } < / genre > ;
}
2006-06-29 00:56:50 +00:00
/ / XML tag genre ; object field thesisType , type
if ( item . thesisType ) {
mods . genre + = < genre > { item . thesisType } < / genre > ;
}
if ( item . type ) {
mods . genre + = < genre > { item . type } < / genre > ;
}
/ / XML tag name ; object field creators
for ( var j in item . creators ) {
var roleTerm = " " ;
if ( item . creators [ j ] . creatorType = = " author " ) {
roleTerm = " aut " ;
} else if ( item . creators [ j ] . creatorType = = " editor " ) {
roleTerm = " edt " ;
} else if ( item . creators [ j ] . creatorType = = " creator " ) {
roleTerm = " ctb " ;
}
/ / FIXME - currently all names are personal
mods . name + = < name type = " personal " >
< namePart type = " family " > { item . creators [ j ] . lastName } < / namePart >
< namePart type = " given " > { item . creators [ j ] . firstName } < / namePart >
< role > < roleTerm type = " code " authority = " marcrelator " > { roleTerm } < / roleTerm > < / role >
< / name > ;
}
/ / XML tag recordInfo . recordOrigin ; used to store our generator note
2006-10-02 23:15:27 +00:00
/ / mods . recordInfo . recordOrigin = " Zotero for Firefox " + Zotero . Utilities . getVersion ( ) ;
2006-06-29 00:56:50 +00:00
/* * FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE * */
/ / XML tag recordInfo . recordContentSource ; object field source
if ( item . source ) {
mods . recordInfo . recordContentSource = item . source ;
}
/ / XML tag recordInfo . recordIdentifier ; object field accessionNumber
if ( item . accessionNumber ) {
mods . recordInfo . recordIdentifier = item . accessionNumber ;
}
/ / XML tag accessCondition ; object field rights
if ( item . rights ) {
mods . accessCondition = item . rights ;
}
/* * SUPPLEMENTAL FIELDS * */
/ / XML tag relatedItem . titleInfo ; object field series
2006-08-06 17:34:41 +00:00
if ( item . seriesTitle ) {
2006-06-29 00:56:50 +00:00
var series = < relatedItem type = " series " >
2006-08-06 17:34:41 +00:00
< titleInfo > < title > { item . seriesTitle } < / title > < / titleInfo >
2006-06-29 00:56:50 +00:00
< / relatedItem > ;
if ( item . itemType = = " bookSection " ) {
/ / For a book section , series info must go inside host tag
mods . relatedItem . relatedItem = series ;
} else {
mods . relatedItem + = series ;
}
}
/ / Make part its own tag so we can figure out where it goes later
var part = new XML ( ) ;
/ / XML tag detail ; object field volume
if ( item . volume ) {
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . isInt ( item . volume ) ) {
2006-06-29 00:56:50 +00:00
part + = < detail type = " volume " > < number > { item . volume } < / number > < / detail > ;
} else {
part + = < detail type = " volume " > < text > { item . volume } < / text > < / detail > ;
}
}
/ / XML tag detail ; object field number
2006-08-06 17:34:41 +00:00
if ( item . issue ) {
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . isInt ( item . issue ) ) {
2006-08-06 17:34:41 +00:00
part + = < detail type = " issue " > < number > { item . issue } < / number > < / detail > ;
2006-06-29 00:56:50 +00:00
} else {
2006-08-06 17:34:41 +00:00
part + = < detail type = " issue " > < text > { item . issue } < / text > < / detail > ;
2006-06-29 00:56:50 +00:00
}
}
/ / XML tag detail ; object field section
if ( item . section ) {
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . isInt ( item . section ) ) {
2006-06-29 00:56:50 +00:00
part + = < detail type = " section " > < number > { item . section } < / number > < / detail > ;
} else {
part + = < detail type = " section " > < text > { item . section } < / text > < / detail > ;
}
}
/ / XML tag detail ; object field pages
if ( item . pages ) {
2006-10-02 23:15:27 +00:00
var range = Zotero . Utilities . getPageRange ( item . pages ) ;
2006-07-05 21:44:01 +00:00
part + = < extent unit = " pages " > < start > { range [ 0 ] } < / start > < end > { range [ 1 ] } < / end > < / extent > ;
2006-06-29 00:56:50 +00:00
}
/ / Assign part if something was assigned
if ( part . length ( ) ! = 1 ) {
if ( isPartialItem ) {
/ / For a journal article , bookSection , etc . , the part is the host
mods . relatedItem . part + = < part > { part } < / part > ;
} else {
mods . part + = < part > { part } < / part > ;
}
}
/ / XML tag originInfo ; object fields edition , place , publisher , year , date
var originInfo = new XML ( ) ;
if ( item . edition ) {
originInfo + = < edition > { item . edition } < / edition > ;
}
if ( item . place ) {
originInfo + = < place > < placeTerm type = " text " > { item . place } < / placeTerm > < / place > ;
}
if ( item . publisher ) {
2006-08-06 09:34:51 +00:00
originInfo + = < publisher > { item . publisher } < / publisher > ;
2006-06-29 00:56:50 +00:00
} else if ( item . distributor ) {
2006-08-06 09:34:51 +00:00
originInfo + = < publisher > { item . distributor } < / publisher > ;
2006-06-29 00:56:50 +00:00
}
if ( item . date ) {
2006-10-02 23:15:27 +00:00
if ( Zotero . Utilities . inArray ( item . itemType , [ " book " , " bookSection " ] ) ) {
2006-08-14 05:12:28 +00:00
/ / Assume year is copyright date
2006-08-14 05:15:52 +00:00
var dateType = " copyrightDate " ;
2006-10-02 23:15:27 +00:00
} else if ( Zotero . Utilities . inArray ( item . itemType , [ " journalArticle " , " magazineArticle " , " newspaperArticle " ] ) ) {
2006-06-29 00:56:50 +00:00
/ / Assume date is date issued
var dateType = " dateIssued " ;
} else {
/ / Assume date is date created
var dateType = " dateCreated " ;
}
2006-08-14 05:23:39 +00:00
var tag = < { dateType } > { item . date } < / { dateType } > ;
originInfo + = tag ;
2006-08-06 17:34:41 +00:00
}
if ( item . accessDate ) {
2006-08-31 00:04:11 +00:00
originInfo + = < dateCaptured > { item . accessDate } < / dateCaptured > ;
2006-08-06 17:34:41 +00:00
}
2006-06-29 00:56:50 +00:00
if ( originInfo . length ( ) ! = 1 ) {
if ( isPartialItem ) {
/ / For a journal article , bookSection , etc . , this goes under the host
mods . relatedItem . originInfo + = < originInfo > { originInfo } < / originInfo > ;
} else {
mods . originInfo + = < originInfo > { originInfo } < / originInfo > ;
}
}
/ / XML tag identifier ; object fields ISBN , ISSN
2006-08-06 17:34:41 +00:00
if ( isPartialItem ) {
var identifier = mods . relatedItem ;
} else {
var identifier = mods ;
}
2006-06-29 00:56:50 +00:00
if ( item . ISBN ) {
2006-08-06 17:34:41 +00:00
identifier . identifier + = < identifier type = " isbn " > { item . ISBN } < / identifier > ;
2006-06-29 00:56:50 +00:00
}
2006-08-06 17:34:41 +00:00
if ( item . ISSN ) {
identifier . identifier + = < identifier type = " issn " > { item . ISSN } < / identifier > ;
}
if ( item . DOI ) {
identifier . identifier + = < identifier type = " doi " > { item . DOI } < / identifier > ;
2006-06-29 00:56:50 +00:00
}
/ / XML tag relatedItem . titleInfo ; object field publication
2006-08-06 17:34:41 +00:00
if ( item . publicationTitle ) {
mods . relatedItem . titleInfo + = < titleInfo > < title > { item . publicationTitle } < / title > < / titleInfo > ;
2006-06-29 00:56:50 +00:00
}
/ / XML tag classification ; object field callNumber
if ( item . callNumber ) {
mods . classification = item . callNumber ;
}
/ / XML tag location . physicalLocation ; object field archiveLocation
if ( item . archiveLocation ) {
mods . location . physicalLocation = item . archiveLocation ;
}
/ / XML tag location . url ; object field archiveLocation
if ( item . url ) {
mods . location . url = item . url ;
}
2006-08-06 17:34:41 +00:00
/ / XML tag title . titleInfo ; object field journalAbbreviation
if ( item . journalAbbreviation ) {
mods . relatedItem . titleInfo + = < titleInfo type = " abbreviated " > < title > { item . journalAbbreviation } < / title > < / titleInfo > ;
}
2006-06-29 00:56:50 +00:00
if ( mods . relatedItem . length ( ) = = 1 & & isPartialItem ) {
mods . relatedItem . @ type = " host " ;
}
/* * NOTES * */
2006-10-02 23:15:27 +00:00
if ( Zotero . getOption ( " exportNotes " ) ) {
2006-08-08 23:00:33 +00:00
for ( var j in item . notes ) {
/ / Add note tag
var note = < note type = " content " > { item . notes [ j ] . note } < / note > ;
mods . note + = note ;
}
2006-07-06 03:39:32 +00:00
}
/* * TAGS * */
for ( var j in item . tags ) {
mods . subject + = < subject > { item . tags [ j ] } < / subject > ;
}
2006-06-29 00:56:50 +00:00
modsCollection . mods + = mods ;
}
2006-10-02 23:15:27 +00:00
Zotero . write ( ' ' < ? xml version = " 1.0 " ? > ' ' + " \n " ) ;
Zotero . write ( modsCollection . toXMLString ( ) ) ;
2006-08-06 09:34:51 +00:00
}
function doImport ( ) {
var text = " " ;
var read ;
2006-09-05 07:51:55 +00:00
/ / read until we see if the file begins with a parse instruction
read = " " ;
while ( read = = " " | | read = = " \n " | | read = = " \r " ) {
2006-10-02 23:15:27 +00:00
read = Zotero . read ( 1 ) ;
2006-09-05 07:51:55 +00:00
}
2006-10-02 23:15:27 +00:00
var firstPart = read + Zotero . read ( 4 ) ;
2006-09-05 07:51:55 +00:00
if ( firstPart = = " <?xml " ) {
/ / got a parse instruction , read until it ends
read = true ;
while ( ( read ! = = false ) & & ( read ! = = " > " ) ) {
2006-10-02 23:15:27 +00:00
read = Zotero . read ( 1 ) ;
2006-09-05 07:51:55 +00:00
firstPart + = read ;
}
var encodingRe = / encoding = [ ' ' " ]([^'' " ] + ) [ ' ' " ]/;
var m = encodingRe . exec ( firstPart ) ;
/ / set character set
try {
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( m [ 1 ] ) ;
2006-09-05 07:51:55 +00:00
} catch ( e ) {
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " utf-8 " ) ;
2006-09-05 07:51:55 +00:00
}
} else {
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " utf-8 " ) ;
2006-09-05 07:51:55 +00:00
text + = firstPart ;
}
2006-08-06 09:34:51 +00:00
/ / read in 16384 byte increments
2006-10-02 23:15:27 +00:00
while ( read = Zotero . read ( 16384 ) ) {
2006-08-06 09:34:51 +00:00
text + = read ;
}
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " read in " ) ;
2006-08-06 09:34:51 +00:00
/ / parse with E4X
var m = new Namespace ( " http://www.loc.gov/mods/v3 " ) ;
/ / why does this default namespace declaration not work ! ?
default xml namespace = m ;
var xml = new XML ( text ) ;
for each ( var mods in xml . m : : mods ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " item is: " ) ;
2006-08-06 09:34:51 +00:00
for ( var i in mods ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( i + " = " + mods [ i ] . toString ( ) ) ;
2006-08-06 09:34:51 +00:00
}
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
2006-08-06 09:34:51 +00:00
/ / title
2006-08-06 17:34:41 +00:00
newItem . title = mods . m : : titleInfo . ( m : : title . @ type ! = " abbreviated " ) . m : : title ;
2006-08-06 09:34:51 +00:00
/ / try to get genre from local genre
var localGenre = mods . m : : genre . ( @ authority = = " local " ) . text ( ) . toString ( ) ;
2006-10-02 23:15:27 +00:00
if ( localGenre & & Zotero . Utilities . itemTypeExists ( localGenre ) ) {
2006-08-06 09:34:51 +00:00
newItem . itemType = localGenre ;
} else {
/ / otherwise , look at the marc genre
var marcGenre = mods . m : : genre . ( @ authority = = " marcgt " ) . text ( ) . toString ( ) ;
if ( marcGenre ) {
if ( marcGenre = = " book " ) {
newItem . itemType = " book " ;
} else if ( marcGenre = = " periodical " ) {
newItem . itemType = " magazineArticle " ;
} else if ( marcGenre = = " newspaper " ) {
newItem . itemType = " newspaperArticle " ;
} else if ( marcGenre = = " theses " ) {
newItem . itemType = " thesis " ;
} else if ( marcGenre = = " letter " ) {
newItem . itemType = " letter " ;
} else if ( marcGenre = = " interview " ) {
newItem . itemType = " interview " ;
} else if ( marcGenre = = " motion picture " ) {
newItem . itemType = " film " ;
} else if ( marcGenre = = " art original " ) {
newItem . itemType = " artwork " ;
} else if ( marcGenre = = " web site " ) {
2006-10-02 00:00:50 +00:00
newItem . itemType = " webpage " ;
2006-08-06 09:34:51 +00:00
}
}
if ( ! newItem . itemType ) {
newItem . itemType = " book " ;
}
}
2006-10-02 23:15:27 +00:00
var isPartialItem = Zotero . Utilities . inArray ( newItem . itemType , partialItemTypes ) ;
2006-08-06 09:34:51 +00:00
/ / TODO : thesisType , type
for each ( var name in mods . m : : name ) {
/ / TODO : institutional authors
var creator = new Array ( ) ;
creator . firstName = name . m : : namePart . ( @ type = = " given " ) . text ( ) . toString ( ) ;
creator . lastName = name . m : : namePart . ( @ type = = " family " ) . text ( ) . toString ( ) ;
/ / look for roles
var role = name . m : : role . m : : roleTerm . ( @ type = = " code " ) . ( @ authority = = " marcrelator " ) . text ( ) . toString ( ) ;
if ( role = = " edt " ) {
creator . creatorType = " editor " ;
} else if ( role = = " ctb " ) {
creator . creatorType = " contributor " ;
} else {
creator . creatorType = " author " ;
}
newItem . creators . push ( creator ) ;
}
/ / source
newItem . source = mods . m : : recordInfo . m : : recordContentSource . text ( ) . toString ( ) ;
/ / accessionNumber
newItem . accessionNumber = mods . m : : recordInfo . m : : recordIdentifier . text ( ) . toString ( ) ;
/ / rights
newItem . rights = mods . m : : accessCondition . text ( ) . toString ( ) ;
/* * SUPPLEMENTAL FIELDS * */
/ / series
if ( newItem . itemType = = " bookSection " ) {
2006-08-06 17:34:41 +00:00
newItem . seriesTitle = mods . m : : relatedItem . ( @ type = = " host " ) . m : : relatedItem . ( @ type = = " series " ) . m : : titleInfo . m : : title . text ( ) . toString ( ) ;
2006-08-06 09:34:51 +00:00
} else {
2006-08-06 17:34:41 +00:00
newItem . seriesTitle = mods . m : : relatedItem . ( @ type = = " series " ) . m : : titleInfo . m : : title . text ( ) . toString ( ) ;
2006-08-06 09:34:51 +00:00
}
/ / get part
if ( isPartialItem ) {
var part = mods . m : : relatedItem . m : : part ;
var originInfo = mods . m : : relatedItem . m : : originInfo ;
var identifier = mods . m : : relatedItem . m : : identifier ;
} else {
var part = mods . m : : part ;
var originInfo = mods . m : : originInfo ;
var identifier = mods . m : : identifier ;
}
/ / volume
newItem . volume = part . m : : detail . ( @ type = = " volume " ) . m : : number . text ( ) . toString ( ) ;
if ( ! newItem . volume ) {
newItem . volume = part . m : : detail . ( @ type = = " volume " ) . m : : text . text ( ) . toString ( ) ;
}
/ / number
2006-08-06 17:34:41 +00:00
newItem . issue = part . m : : detail . ( @ type = = " issue " ) . m : : number . text ( ) . toString ( ) ;
if ( ! newItem . issue ) {
newItem . issue = part . m : : detail . ( @ type = = " issue " ) . m : : text . text ( ) . toString ( ) ;
2006-08-06 09:34:51 +00:00
}
/ / section
newItem . section = part . m : : detail . ( @ type = = " section " ) . m : : number . text ( ) . toString ( ) ;
if ( ! newItem . section ) {
newItem . section = part . m : : detail . ( @ type = = " section " ) . m : : text . text ( ) . toString ( ) ;
}
/ / pages
var pagesStart = part . m : : extent . ( @ unit = = " pages " ) . m : : start . text ( ) . toString ( ) ;
var pagesEnd = part . m : : extent . ( @ unit = = " pages " ) . m : : end . text ( ) . toString ( ) ;
if ( pagesStart | | pagesEnd ) {
if ( pagesStart & & pagesEnd & & pagesStart ! = pagesEnd ) {
newItem . pages = pagesStart + " - " + pagesEnd ;
} else {
newItem . pages = pagesStart + pagesEnd ;
}
}
/ / edition
newItem . edition = originInfo . m : : edition . text ( ) . toString ( ) ;
/ / place
newItem . place = originInfo . m : : place . m : : placeTerm . text ( ) . toString ( ) ;
/ / publisher / distributor
newItem . publisher = newItem . distributor = originInfo . m : : publisher . text ( ) . toString ( ) ;
/ / date
newItem . date = originInfo . m : : copyrightDate . text ( ) . toString ( ) ;
if ( ! newItem . date ) {
newItem . date = originInfo . m : : dateIssued . text ( ) . toString ( ) ;
if ( ! newItem . date ) {
newItem . date = originInfo . dateCreated . text ( ) . toString ( ) ;
}
}
2006-08-06 17:34:41 +00:00
/ / lastModified
newItem . lastModified = originInfo . m : : dateModified . text ( ) . toString ( ) ;
/ / accessDate
newItem . accessDate = originInfo . m : : dateCaptured . text ( ) . toString ( ) ;
2006-08-06 09:34:51 +00:00
/ / ISBN
2006-08-06 17:34:41 +00:00
newItem . ISBN = identifier . ( @ type = = " isbn " ) . text ( ) . toString ( )
2006-08-06 09:34:51 +00:00
/ / ISSN
2006-08-06 17:34:41 +00:00
newItem . ISSN = identifier . ( @ type = = " issn " ) . text ( ) . toString ( )
/ / DOI
newItem . DOI = identifier . ( @ type = = " doi " ) . text ( ) . toString ( )
2006-08-06 09:34:51 +00:00
/ / publication
2006-08-06 17:34:41 +00:00
newItem . publicationTitle = mods . m : : relatedItem . m : : publication . text ( ) . toString ( ) ;
2006-08-06 09:34:51 +00:00
/ / call number
newItem . callNumber = mods . m : : classification . text ( ) . toString ( ) ;
/ / archiveLocation
newItem . archiveLocation = mods . m : : location . m : : physicalLocation . text ( ) . toString ( ) ;
/ / url
newItem . url = mods . m : : location . m : : url . text ( ) . toString ( ) ;
2006-08-06 17:34:41 +00:00
/ / journalAbbreviation
newItem . journalAbbreviation = mods . m : : relatedItem . ( m : : titleInfo . @ type = = " abbreviated " ) . m : : titleInfo . m : : title . text ( ) . toString ( ) ;
2006-08-06 09:34:51 +00:00
/* * NOTES * */
for each ( var note in mods . m : : note ) {
newItem . notes . push ( { note : note . text ( ) . toString ( ) } ) ;
}
/* * TAGS * */
for each ( var subject in mods . m : : subject ) {
newItem . tags . push ( subject . text ( ) . toString ( ) ) ;
}
newItem . complete ( ) ;
}
2006-06-30 19:21:36 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 14763d24-8ba0-45df-8f52-b8d1108e7ac9 ' , ' 2006-10-02 17:00:00 ' , 1 , 25 , 2 , ' Zotero RDF ' , ' Simon Kornblith ' , ' rdf ' ,
' Zotero.configure("getCollections", true);
Zotero . configure ( " dataMode " , " rdf " ) ;
Zotero . addOption ( " exportNotes " , true ) ;
Zotero . addOption ( " exportFileData " , false ) ; ' ,
2006-07-07 18:41:21 +00:00
' function generateSeeAlso(resource, seeAlso) {
for ( var i in seeAlso ) {
2006-08-20 04:35:04 +00:00
if ( itemResources [ seeAlso [ i ] ] ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " relation " , itemResources [ seeAlso [ i ] ] , false ) ;
2006-08-20 04:35:04 +00:00
}
}
}
function generateTags ( resource , tags ) {
for ( var j in tags ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " subject " , tags [ j ] , true ) ;
2006-07-07 18:41:21 +00:00
}
}
function generateCollection ( collection ) {
var collectionResource = " #collection: " + collection . id ;
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( collectionResource , rdf + " type " , n . bib + " Collection " , false ) ;
Zotero . RDF . addStatement ( collectionResource , n . dc + " title " , collection . name , true ) ;
2006-07-07 18:41:21 +00:00
2006-08-05 20:58:45 +00:00
for each ( var child in collection . children ) {
2006-07-07 18:41:21 +00:00
/ / add child list items
if ( child . type = = " collection " ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( collectionResource , n . dcterms + " hasPart " , " #collection: " + child . id , false ) ;
2006-07-07 18:41:21 +00:00
/ / do recursive processing of collections
generateCollection ( child ) ;
2006-08-18 05:58:14 +00:00
} else if ( itemResources [ child . id ] ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( collectionResource , n . dcterms + " hasPart " , itemResources [ child . id ] , false ) ;
2006-07-07 18:41:21 +00:00
}
}
}
2006-08-18 05:58:14 +00:00
function handleAttachment ( attachmentResource , attachment ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( attachmentResource , rdf + " type " , n . fs + " Attachment " , false ) ;
2006-08-20 04:35:04 +00:00
if ( attachment . path ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( attachmentResource , rdf + " resource " , attachment . path , false ) ;
2006-08-20 04:35:04 +00:00
}
2006-08-18 05:58:14 +00:00
if ( attachment . url ) {
/ / add url as identifier
2006-10-02 23:15:27 +00:00
var term = Zotero . RDF . newResource ( ) ;
2006-08-18 05:58:14 +00:00
/ / set term type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( term , rdf + " type " , n . dcterms + " URI " , false ) ;
2006-08-18 05:58:14 +00:00
/ / set url value
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( term , rdf + " value " , attachment . url , true ) ;
2006-08-18 05:58:14 +00:00
/ / add relationship to resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( attachmentResource , n . dc + " identifier " , term , false ) ;
2006-08-18 05:58:14 +00:00
}
/ / set mime type value
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( attachmentResource , n . link + " type " , attachment . mimeType , true ) ;
2006-08-20 04:35:04 +00:00
/ / set charset value
if ( attachment . charset ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( attachmentResource , n . link + " charset " , attachment . charset , true ) ;
2006-08-20 04:35:04 +00:00
}
2006-08-18 05:58:14 +00:00
/ / add title
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( attachmentResource , n . dc + " title " , attachment . title , true ) ;
2006-08-20 04:35:04 +00:00
/ / Add see also info to RDF
generateSeeAlso ( attachmentResource , attachment . seeAlso ) ;
generateTags ( attachmentResource , attachment . tags ) ;
2006-08-18 05:58:14 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doExport ( ) {
2006-07-07 18:41:21 +00:00
rdf = " http://www.w3.org/1999/02/22-rdf-syntax-ns# " ;
n = {
bib : " http://purl.org/net/biblio# " ,
dc : " http://purl.org/dc/elements/1.1/ " ,
dcterms : " http://purl.org/dc/terms/ " ,
prism : " http://prismstandard.org/namespaces/1.2/basic/ " ,
foaf : " http://xmlns.com/foaf/0.1/ " ,
2006-08-18 05:58:14 +00:00
vcard : " http://nwalsh.com/rdf/vCard# " ,
2006-08-20 04:35:04 +00:00
link : " http://purl.org/rss/1.0/modules/link/ " ,
2006-08-30 19:57:23 +00:00
fs : " http://www.zotero.org/namespaces/export# "
2006-07-07 18:41:21 +00:00
} ;
/ / add namespaces
for ( var i in n ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addNamespace ( i , n [ i ] ) ;
2006-07-07 18:41:21 +00:00
}
/ / leave as global
itemResources = new Array ( ) ;
2006-08-05 20:58:45 +00:00
/ / keep track of resources already assigned ( in case two book items have the
/ / same ISBN , or something like that )
var usedResources = new Array ( ) ;
var items = new Array ( ) ;
2006-07-07 18:41:21 +00:00
/ / first , map each ID to a resource
2006-10-02 23:15:27 +00:00
while ( item = Zotero . nextItem ( ) ) {
2006-08-05 20:58:45 +00:00
items . push ( item ) ;
2006-07-07 18:41:21 +00:00
2006-08-20 04:35:04 +00:00
if ( item . ISBN & & ! usedResources [ " urn:isbn: " + item . ISBN ] ) {
2006-07-07 18:41:21 +00:00
itemResources [ item . itemID ] = " urn:isbn: " + item . ISBN ;
2006-08-05 20:58:45 +00:00
usedResources [ itemResources [ item . itemID ] ] = true ;
2006-08-20 04:35:04 +00:00
} else if ( item . itemType ! = " attachment " & & item . url & & ! usedResources [ item . url ] ) {
2006-07-07 18:41:21 +00:00
itemResources [ item . itemID ] = item . url ;
2006-08-05 20:58:45 +00:00
usedResources [ itemResources [ item . itemID ] ] = true ;
2006-07-07 18:41:21 +00:00
} else {
/ / just specify a node ID
itemResources [ item . itemID ] = " #item: " + item . itemID ;
}
for ( var j in item . notes ) {
itemResources [ item . notes [ j ] . itemID ] = " #item: " + item . notes [ j ] . itemID ;
}
2006-08-18 05:58:14 +00:00
for each ( var attachment in item . attachments ) {
2006-08-20 04:35:04 +00:00
/ / just specify a node ID
itemResources [ attachment . itemID ] = " #item: " + attachment . itemID ;
2006-08-18 05:58:14 +00:00
}
2006-07-07 18:41:21 +00:00
}
2006-08-05 20:58:45 +00:00
for each ( item in items ) {
2006-07-07 18:41:21 +00:00
/ / these items are global
resource = itemResources [ item . itemID ] ;
container = null ;
containerElement = null ;
section = null ;
/* * CORE FIELDS * */
/ / title
if ( item . title ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " title " , item . title , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / type
var type = null ;
if ( item . itemType = = " book " ) {
type = " Book " ;
} else if ( item . itemType = = " bookSection " ) {
type = " BookSection " ;
container = " Book " ;
} else if ( item . itemType = = " journalArticle " ) {
type = " Article " ;
container = " Journal " ;
} else if ( item . itemType = = " magazineArticle " ) {
type = " Article " ;
container = " Periodical " ;
} else if ( item . itemType = = " newspaperArticle " ) {
type = " Article " ;
container = " Newspaper " ;
} else if ( item . itemType = = " thesis " ) {
type = " Thesis " ;
} else if ( item . itemType = = " letter " ) {
type = " Letter " ;
} else if ( item . itemType = = " manuscript " ) {
type = " Manuscript " ;
} else if ( item . itemType = = " interview " ) {
type = " Interview " ;
} else if ( item . itemType = = " film " ) {
type = " MotionPicture " ;
} else if ( item . itemType = = " artwork " ) {
type = " Illustration " ;
2006-10-02 00:00:50 +00:00
} else if ( item . itemType = = " webpage " ) {
2006-07-07 18:41:21 +00:00
type = " Document " ;
} else if ( item . itemType = = " note " ) {
type = " Memo " ;
2006-10-02 23:15:27 +00:00
if ( ! Zotero . getOption ( " exportNotes " ) ) {
2006-08-08 23:00:33 +00:00
continue ;
}
2006-08-18 05:58:14 +00:00
} else if ( item . itemType = = " attachment " ) {
handleAttachment ( resource , item ) ;
continue ;
2006-07-07 18:41:21 +00:00
}
if ( type ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , rdf + " type " , n . bib + type , false ) ;
2006-07-07 18:41:21 +00:00
}
/ / authors / editors / contributors
var creatorContainers = new Object ( ) ;
for ( var j in item . creators ) {
2006-10-02 23:15:27 +00:00
var creator = Zotero . RDF . newResource ( ) ;
Zotero . RDF . addStatement ( creator , rdf + " type " , n . foaf + " Person " , false ) ;
2006-07-07 18:41:21 +00:00
/ / gee . an entire vocabulary for describing people , and these aren ' ' t even
/ / standardized in it . oh well . using them anyway .
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( creator , n . foaf + " surname " , item . creators [ j ] . lastName , true ) ;
Zotero . RDF . addStatement ( creator , n . foaf + " givenname " , item . creators [ j ] . firstName , true ) ;
2006-07-07 18:41:21 +00:00
/ / in addition , these tags are not yet in Biblio , but Bruce D ' ' Arcus
/ / says they will be .
if ( item . creators [ j ] . creatorType = = " author " ) {
var cTag = " authors " ;
} else if ( item . creators [ j ] . creatorType = = " editor " ) {
var cTag = " editors " ;
} else {
var cTag = " contributors " ;
}
if ( ! creatorContainers [ cTag ] ) {
2006-10-02 23:15:27 +00:00
var creatorResource = Zotero . RDF . newResource ( ) ;
2006-07-07 18:41:21 +00:00
/ / create new seq for author type
2006-10-02 23:15:27 +00:00
creatorContainers [ cTag ] = Zotero . RDF . newContainer ( " seq " , creatorResource ) ;
2006-07-07 18:41:21 +00:00
/ / attach container to resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . bib + cTag , creatorResource , false ) ;
2006-07-07 18:41:21 +00:00
}
2006-10-02 23:15:27 +00:00
Zotero . RDF . addContainerElement ( creatorContainers [ cTag ] , creator , false ) ;
2006-07-07 18:41:21 +00:00
}
/* * FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE * */
/ / source
if ( item . source ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " source " , item . source , true ) ;
2006-07-07 18:41:21 +00:00
}
2006-08-18 05:58:14 +00:00
/ / url
if ( item . url ) {
/ / add url as identifier
2006-10-02 23:15:27 +00:00
var term = Zotero . RDF . newResource ( ) ;
2006-08-18 05:58:14 +00:00
/ / set term type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( term , rdf + " type " , n . dcterms + " URI " , false ) ;
2006-08-18 05:58:14 +00:00
/ / set url value
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( term , rdf + " value " , attachment . url , true ) ;
2006-08-18 05:58:14 +00:00
/ / add relationship to resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " identifier " , term , false ) ;
2006-08-18 05:58:14 +00:00
}
2006-07-07 18:41:21 +00:00
/ / accessionNumber as generic ID
if ( item . accessionNumber ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " identifier " , item . accessionNumber , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / rights
if ( item . rights ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " rights " , item . rights , true ) ;
2006-07-07 18:41:21 +00:00
}
/* * SUPPLEMENTAL FIELDS * */
/ / use section to set up another container element
if ( item . section ) {
2006-10-02 23:15:27 +00:00
section = Zotero . RDF . newResource ( ) ; / / leave as global
2006-07-07 18:41:21 +00:00
/ / set section type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( section , rdf + " type " , n . bib + " Part " , false ) ;
2006-07-07 18:41:21 +00:00
/ / set section title
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( section , n . dc + " title " , item . section , true ) ;
2006-07-07 18:41:21 +00:00
/ / add relationship to resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " isPartOf " , section , false ) ;
2006-07-07 18:41:21 +00:00
}
2006-08-05 20:58:45 +00:00
/ / generate container
if ( container ) {
2006-10-02 23:15:27 +00:00
if ( item . ISSN & & ! Zotero . RDF . getArcsIn ( " urn:issn: " + item . ISSN ) ) {
2006-08-05 20:58:45 +00:00
/ / use ISSN as container URI if no other item is
containerElement = " urn:issn: " + item . ISSN
} else {
2006-10-02 23:15:27 +00:00
containerElement = Zotero . RDF . newResource ( ) ;
2006-08-05 20:58:45 +00:00
}
2006-07-07 18:41:21 +00:00
/ / attach container to section ( if exists ) or resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( section ? section : resource ) , n . dcterms + " isPartOf " , containerElement , false ) ;
2006-08-05 20:58:45 +00:00
/ / add container type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( containerElement , rdf + " type " , n . bib + container , false ) ;
2006-08-05 20:58:45 +00:00
}
/ / ISSN
if ( item . ISSN ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . dc + " identifier " , " ISSN " + item . ISSN , true ) ;
2006-08-05 20:58:45 +00:00
}
/ / ISBN
if ( item . ISBN ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . dc + " identifier " , " ISBN " + item . ISBN , true ) ;
2006-07-07 18:41:21 +00:00
}
2006-08-06 17:34:41 +00:00
/ / DOI
if ( item . DOI ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . dc + " identifier " , " DOI " + item . DOI , true ) ;
2006-08-06 17:34:41 +00:00
}
2006-07-07 18:41:21 +00:00
/ / publication gets linked to container via isPartOf
2006-08-18 05:58:14 +00:00
if ( item . publicationTitle ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . dc + " title " , item . publicationTitle , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / series also linked in
2006-08-06 17:34:41 +00:00
if ( item . seriesTitle ) {
2006-10-02 23:15:27 +00:00
var series = Zotero . RDF . newResource ( ) ;
2006-07-07 18:41:21 +00:00
/ / set series type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( series , rdf + " type " , n . bib + " Series " , false ) ;
2006-07-07 18:41:21 +00:00
/ / set series title
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( series , n . dc + " title " , item . seriesTitle , true ) ;
2006-07-07 18:41:21 +00:00
/ / add relationship to resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . dcterms + " isPartOf " , series , false ) ;
2006-07-07 18:41:21 +00:00
}
/ / volume
if ( item . volume ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . prism + " volume " , item . volume , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / number
2006-08-06 17:34:41 +00:00
if ( item . issue ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . prism + " number " , item . issue , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / edition
if ( item . edition ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . prism + " edition " , item . edition , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / publisher / distributor and place
if ( item . publisher | | item . distributor | | item . place ) {
2006-10-02 23:15:27 +00:00
var organization = Zotero . RDF . newResource ( ) ;
2006-07-07 18:41:21 +00:00
/ / set organization type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( organization , rdf + " type " , n . foaf + " Organization " , false ) ;
2006-07-07 18:41:21 +00:00
/ / add relationship to resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " publisher " , organization , false ) ;
2006-07-07 18:41:21 +00:00
/ / add publisher / distributor
if ( item . publisher ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( organization , n . foaf + " name " , item . publisher , true ) ;
2006-07-07 18:41:21 +00:00
} else if ( item . distributor ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( organization , n . foaf + " name " , item . distributor , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / add place
if ( item . place ) {
2006-10-02 23:15:27 +00:00
var address = Zotero . RDF . newResource ( ) ;
2006-07-07 18:41:21 +00:00
/ / set address type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( address , rdf + " type " , n . vcard + " Address " , false ) ;
2006-07-07 18:41:21 +00:00
/ / set address locality
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( address , n . vcard + " locality " , item . place , true ) ;
2006-07-07 18:41:21 +00:00
/ / add relationship to organization
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( organization , n . vcard + " adr " , address , false ) ;
2006-07-07 18:41:21 +00:00
}
}
/ / date / year
if ( item . date ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " date " , item . date , true ) ;
2006-08-06 17:34:41 +00:00
}
if ( item . accessDate ) { / / use date submitted for access date ?
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dcterms + " dateSubmitted " , item . accessDate , true ) ;
2006-08-06 17:34:41 +00:00
}
2006-07-07 18:41:21 +00:00
/ / callNumber
if ( item . callNumber ) {
2006-10-02 23:15:27 +00:00
var term = Zotero . RDF . newResource ( ) ;
2006-07-07 18:41:21 +00:00
/ / set term type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( term , rdf + " type " , n . dcterms + " LCC " , false ) ;
2006-07-07 18:41:21 +00:00
/ / set callNumber value
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( term , rdf + " value " , item . callNumber , true ) ;
2006-07-07 18:41:21 +00:00
/ / add relationship to resource
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " subject " , term , false ) ;
2006-07-07 18:41:21 +00:00
}
/ / archiveLocation
if ( item . archiveLocation ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " coverage " , item . archiveLocation , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / type ( not itemType )
if ( item . type ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " type " , item . type , true ) ;
2006-07-07 18:41:21 +00:00
} else if ( item . thesisType ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " type " , item . thesisType , true ) ;
2006-07-07 18:41:21 +00:00
}
/ / THIS IS NOT YET IN THE BIBLIO NAMESPACE , BUT BRUCE D ' ' ARCUS HAS SAID
/ / IT WILL BE SOON
if ( item . pages ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . bib + " pages " , item . pages , true ) ;
2006-07-07 18:41:21 +00:00
}
2006-08-06 17:34:41 +00:00
/ / journalAbbreviation
if ( item . journalAbbreviation ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( ( containerElement ? containerElement : resource ) , n . dcterms + " alternative " , item . journalAbbreviation , true ) ;
2006-08-06 17:34:41 +00:00
}
2006-08-31 00:04:11 +00:00
/ / extra
if ( item . extra ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dc + " description " , item . extra , true ) ;
2006-08-31 00:04:11 +00:00
}
2006-07-07 18:41:21 +00:00
/* * NOTES * */
2006-10-02 23:15:27 +00:00
if ( Zotero . getOption ( " exportNotes " ) ) {
2006-08-08 23:00:33 +00:00
for ( var j in item . notes ) {
var noteResource = itemResources [ item . notes [ j ] . itemID ] ;
/ / add note tag
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( noteResource , rdf + " type " , n . bib + " Memo " , false ) ;
2006-08-08 23:00:33 +00:00
/ / add note value
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( noteResource , rdf + " value " , item . notes [ j ] . note , true ) ;
2006-08-08 23:00:33 +00:00
/ / add relationship between resource and note
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . dcterms + " isReferencedBy " , noteResource , false ) ;
2006-08-08 23:00:33 +00:00
/ / Add see also info to RDF
2006-08-20 04:35:04 +00:00
generateSeeAlso ( noteResource , item . notes [ j ] . seeAlso ) ;
generateTags ( noteResource , item . notes [ j ] . tags ) ;
2006-08-08 23:00:33 +00:00
}
2006-07-07 18:41:21 +00:00
2006-08-08 23:00:33 +00:00
if ( item . note ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , rdf + " value " , item . note , true ) ;
2006-08-08 23:00:33 +00:00
}
2006-07-07 18:41:21 +00:00
}
2006-08-18 05:58:14 +00:00
/* * FILES * */
for each ( var attachment in item . attachments ) {
var attachmentResource = itemResources [ attachment . itemID ] ;
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , n . link + " link " , attachmentResource , false ) ;
2006-08-18 05:58:14 +00:00
handleAttachment ( attachmentResource , attachment ) ;
}
2006-08-20 04:35:04 +00:00
/* * SEE ALSO AND TAGS * */
2006-07-07 18:41:21 +00:00
2006-08-05 20:58:45 +00:00
generateSeeAlso ( resource , item . seeAlso ) ;
2006-08-20 04:35:04 +00:00
generateTags ( resource , item . tags ) ;
2006-07-07 18:41:21 +00:00
}
/* * RDF COLLECTION STRUCTURE * */
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var collection ;
2006-10-02 23:15:27 +00:00
while ( collection = Zotero . nextCollection ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
generateCollection ( collection ) ;
2006-07-07 18:41:21 +00:00
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 6e372642-ed9d-4934-b5d1-c11ac758ebb7 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 2 , ' Unqualified Dublin Core RDF ' , ' Simon Kornblith ' , ' rdf ' ,
' Zotero.configure("dataMode", "rdf"); ' ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
' function doExport() {
2006-07-06 21:55:46 +00:00
var dc = " http://purl.org/dc/elements/1.1/ " ;
2006-10-02 23:15:27 +00:00
Zotero . RDF . addNamespace ( " dc " , dc ) ;
2006-07-05 21:44:01 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var item ;
2006-10-02 23:15:27 +00:00
while ( item = Zotero . nextItem ( ) ) {
2006-09-09 22:00:04 +00:00
if ( item . itemType = = " note " | | item . itemType = = " attachment " ) {
2006-07-05 21:44:01 +00:00
continue ;
}
2006-07-06 21:55:46 +00:00
var resource ;
2006-07-05 21:44:01 +00:00
if ( item . ISBN ) {
2006-07-06 21:55:46 +00:00
resource = " urn:isbn: " + item . ISBN ;
2006-07-05 21:44:01 +00:00
} else if ( item . url ) {
2006-07-06 21:55:46 +00:00
resource = item . url ;
2006-07-05 21:44:01 +00:00
} else {
2006-07-06 03:39:32 +00:00
/ / just specify a node ID
2006-10-02 23:15:27 +00:00
resource = Zotero . RDF . newResource ( ) ;
2006-07-05 21:44:01 +00:00
}
/* * CORE FIELDS * */
2006-07-07 18:41:21 +00:00
/ / title
if ( item . title ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " title " , item . title , true ) ;
2006-07-05 21:44:01 +00:00
}
2006-07-07 18:41:21 +00:00
/ / type
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " type " , item . itemType , true ) ;
2006-07-07 18:41:21 +00:00
/ / creators
2006-07-05 21:44:01 +00:00
for ( var j in item . creators ) {
/ / put creators in lastName , firstName format ( although DC doesn ' ' t specify )
var creator = item . creators [ j ] . lastName ;
if ( item . creators [ j ] . firstName ) {
creator + = " , " + item . creators [ j ] . firstName ;
}
if ( item . creators [ j ] . creatorType = = " author " ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " creator " , creator , true ) ;
2006-07-05 21:44:01 +00:00
} else {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " contributor " , creator , true ) ;
2006-07-05 21:44:01 +00:00
}
}
/* * FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE * */
/ / source
if ( item . source ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " source " , item . source , true ) ;
2006-07-05 21:44:01 +00:00
}
/ / accessionNumber as generic ID
if ( item . accessionNumber ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " identifier " , item . accessionNumber , true ) ;
2006-07-05 21:44:01 +00:00
}
/ / rights
if ( item . rights ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " rights " , item . rights , true ) ;
2006-07-05 21:44:01 +00:00
}
/* * SUPPLEMENTAL FIELDS * */
/ / TODO - create text citation and OpenURL citation to handle volume , number , pages , issue , place
/ / publisher / distributor
if ( item . publisher ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " publisher " , item . publisher , true ) ;
2006-07-05 21:44:01 +00:00
} else if ( item . distributor ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " publisher " , item . distributor , true ) ;
2006-07-05 21:44:01 +00:00
}
/ / date / year
if ( item . date ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " date " , item . date , true ) ;
2006-07-05 21:44:01 +00:00
}
2006-08-06 17:34:41 +00:00
/ / ISBN / ISSN / DOI
2006-07-05 21:44:01 +00:00
if ( item . ISBN ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " identifier " , " ISBN " + item . ISBN , true ) ;
2006-08-06 17:34:41 +00:00
}
if ( item . ISSN ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " identifier " , " ISSN " + item . ISSN , true ) ;
2006-07-05 21:44:01 +00:00
}
2006-08-06 17:34:41 +00:00
if ( item . DOI ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " identifier " , " DOI " + item . DOI , true ) ;
2006-08-06 17:34:41 +00:00
}
2006-07-05 21:44:01 +00:00
/ / callNumber
if ( item . callNumber ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " identifier " , item . callNumber , true ) ;
2006-07-05 21:44:01 +00:00
}
/ / archiveLocation
if ( item . archiveLocation ) {
2006-10-02 23:15:27 +00:00
Zotero . RDF . addStatement ( resource , dc + " coverage " , item . archiveLocation , true ) ;
2006-07-05 21:44:01 +00:00
}
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 5e3ad958-ac79-463d-812b-a86a9235c28f ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 1 , ' RDF ' , ' Simon Kornblith ' , ' rdf ' ,
' Zotero.configure("dataMode", "rdf");
2006-08-08 02:46:52 +00:00
function detectImport ( ) {
/ / unfortunately , Mozilla will let you create a data source from any type
/ / of XML , so we need to make sure there are actually nodes
2006-10-02 23:15:27 +00:00
var nodes = Zotero . RDF . getAllResources ( ) ;
2006-08-08 02:46:52 +00:00
if ( nodes ) {
return true ;
}
} ' ,
2006-08-05 20:58:45 +00:00
' // gets the first result set for a property that can be encoded in multiple
/ / ontologies
function getFirstResults ( node , properties , onlyOneString ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i = 0 ; i < properties . length ; i + + ) {
2006-10-02 23:15:27 +00:00
var result = Zotero . RDF . getTargets ( node , properties [ i ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( result ) {
if ( onlyOneString ) {
/ / onlyOneString means we won ' ' t return nsIRDFResources , only
/ / actual literals
2006-08-05 20:58:45 +00:00
if ( typeof ( result [ 0 ] ) ! = " object " ) {
return result [ 0 ] ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
return result ;
}
}
2006-07-05 21:44:01 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
return ; / / return undefined on failure
2006-07-05 21:44:01 +00:00
}
2006-08-05 20:58:45 +00:00
/ / adds creators to an item given a list of creator nodes
function handleCreators ( newItem , creators , creatorType ) {
if ( ! creators ) {
return ;
}
if ( typeof ( creators [ 0 ] ) ! = " string " ) { / / see if creators are in a container
try {
2006-10-02 23:15:27 +00:00
var creators = Zotero . RDF . getContainerElements ( creators [ 0 ] ) ;
2006-08-05 20:58:45 +00:00
} catch ( e ) { }
}
if ( typeof ( creators [ 0 ] ) = = " string " ) { / / support creators encoded as strings
for ( var i in creators ) {
if ( typeof ( creators [ i ] ) ! = " object " ) {
2006-10-02 23:15:27 +00:00
newItem . creators . push ( Zotero . Utilities . cleanAuthor ( creators [ i ] , creatorType , true ) ) ;
2006-08-05 20:58:45 +00:00
}
}
} else { / / also support foaf
for ( var i in creators ) {
2006-10-02 23:15:27 +00:00
var type = Zotero . RDF . getTargets ( creators [ i ] , rdf + " type " ) ;
2006-08-05 20:58:45 +00:00
if ( type ) {
2006-10-02 23:15:27 +00:00
type = Zotero . RDF . getResourceURI ( type [ 0 ] ) ;
2006-08-05 20:58:45 +00:00
if ( type = = n . foaf + " Person " ) { / / author is FOAF type person
var creator = new Array ( ) ;
creator . lastName = getFirstResults ( creators [ i ] ,
[ n . foaf + " surname " , n . foaf + " family_name " ] , true ) ;
creator . firstName = getFirstResults ( creators [ i ] ,
[ n . foaf + " givenname " , n . foaf + " firstName " ] , true ) ;
creator . creatorType = creatorType ;
newItem . creators . push ( creator ) ;
}
}
}
}
}
2006-08-18 05:58:14 +00:00
/ / gets attachment info
function handleAttachment ( node , attachment ) {
if ( ! attachment ) {
attachment = new Array ( ) ;
}
attachment . title = getFirstResults ( node , [ n . dc + " title " ] , true ) ;
2006-08-20 04:35:04 +00:00
var path = getFirstResults ( node , [ rdf + " resource " ] ) ;
if ( path ) {
2006-10-02 23:15:27 +00:00
attachment . path = Zotero . RDF . getResourceURI ( path [ 0 ] ) ;
2006-08-20 04:35:04 +00:00
}
attachment . charset = getFirstResults ( node , [ n . link + " charset " ] , true ) ;
attachment . mimeType = getFirstResults ( node , [ n . link + " type " ] , true ) ;
2006-08-18 05:58:14 +00:00
var identifiers = getFirstResults ( node , [ n . dc + " identifier " ] ) ;
for each ( var identifier in identifiers ) {
if ( typeof ( identifier ) ! = " string " ) {
2006-10-02 23:15:27 +00:00
var identifierType = Zotero . RDF . getTargets ( identifier , rdf + " type " ) ;
2006-08-18 05:58:14 +00:00
if ( identifierType ) {
2006-10-02 23:15:27 +00:00
identifierType = Zotero . RDF . getResourceURI ( identifierType [ 0 ] ) ;
2006-08-18 05:58:14 +00:00
if ( identifierType = = n . dcterms + " URI " ) { / / uri is url
attachment . url = getFirstResults ( identifier , [ rdf + " value " ] , true ) ;
}
}
}
}
2006-08-20 04:35:04 +00:00
/ / get seeAlso and tags
processSeeAlso ( node , attachment ) ;
processTags ( node , attachment ) ;
2006-08-18 05:58:14 +00:00
return attachment ;
}
2006-08-05 20:58:45 +00:00
/ / processes collections recursively
function processCollection ( node , collection ) {
if ( ! collection ) {
collection = new Array ( ) ;
}
collection . type = " collection " ;
collection . name = getFirstResults ( node , [ n . dc + " title " ] , true ) ;
collection . children = new Array ( ) ;
/ / check for children
var children = getFirstResults ( node , [ n . dcterms + " hasPart " ] ) ;
for each ( var child in children ) {
2006-10-02 23:15:27 +00:00
var type = Zotero . RDF . getTargets ( child , rdf + " type " ) ;
2006-08-05 20:58:45 +00:00
if ( type ) {
2006-10-02 23:15:27 +00:00
type = Zotero . RDF . getResourceURI ( type [ 0 ] ) ;
2006-08-05 20:58:45 +00:00
}
if ( type = = n . bib + " Collection " ) {
/ / for collections , process recursively
collection . children . push ( processCollection ( child ) ) ;
} else {
/ / all other items are added by ID
2006-10-02 23:15:27 +00:00
collection . children . push ( { id : Zotero . RDF . getResourceURI ( child ) , type : " item " } ) ;
2006-08-05 20:58:45 +00:00
}
}
return collection ;
}
2006-08-20 04:35:04 +00:00
function processSeeAlso ( node , newItem ) {
var relations ;
2006-10-02 23:15:27 +00:00
newItem . itemID = Zotero . RDF . getResourceURI ( node ) ;
2006-08-20 04:35:04 +00:00
newItem . seeAlso = new Array ( ) ;
if ( relations = getFirstResults ( node , [ n . dc + " relation " ] ) ) {
for each ( var relation in relations ) {
2006-10-02 23:15:27 +00:00
newItem . seeAlso . push ( Zotero . RDF . getResourceURI ( relation ) ) ;
2006-08-20 04:35:04 +00:00
}
}
}
function processTags ( node , newItem ) {
var subjects ;
newItem . tags = new Array ( ) ;
if ( subjects = getFirstResults ( node , [ n . dc + " subject " ] ) ) {
for each ( var subject in subjects ) {
if ( typeof ( subject ) = = " string " ) { / / a regular tag
newItem . tags . push ( subject ) ;
}
}
}
}
2006-08-05 20:58:45 +00:00
/ / gets the node with a given type from an array
function getNodeByType ( nodes , type ) {
if ( ! nodes ) {
return false ;
}
for each ( node in nodes ) {
2006-10-02 23:15:27 +00:00
var nodeType = Zotero . RDF . getTargets ( node , rdf + " type " ) ;
2006-08-05 20:58:45 +00:00
if ( nodeType ) {
2006-10-02 23:15:27 +00:00
nodeType = Zotero . RDF . getResourceURI ( nodeType [ 0 ] ) ;
2006-08-05 20:58:45 +00:00
if ( nodeType = = type ) { / / we have a node of the correct type
return node ;
}
}
}
return false ;
}
2006-08-20 04:35:04 +00:00
/ / returns true if this resource is part of another ( related by any arc besides
/ / dc : relation or dcterms : hasPart )
/ /
/ / used to differentiate independent notes and files
function isPart ( node ) {
2006-10-02 23:15:27 +00:00
var arcs = Zotero . RDF . getArcsIn ( node ) ;
2006-08-20 04:35:04 +00:00
var skip = false ;
for each ( var arc in arcs ) {
2006-10-02 23:15:27 +00:00
arc = Zotero . RDF . getResourceURI ( arc ) ;
2006-08-20 04:35:04 +00:00
if ( arc ! = n . dc + " relation " & & arc ! = n . dcterms + " hasPart " ) {
/ / related to another item by some arc besides see also
skip = true ;
}
}
return skip ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
function doImport ( ) {
2006-08-05 20:58:45 +00:00
rdf = " http://www.w3.org/1999/02/22-rdf-syntax-ns# " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
n = {
bib : " http://purl.org/net/biblio# " ,
dc : " http://purl.org/dc/elements/1.1/ " ,
dcterms : " http://purl.org/dc/terms/ " ,
prism : " http://prismstandard.org/namespaces/1.2/basic/ " ,
foaf : " http://xmlns.com/foaf/0.1/ " ,
2006-08-18 05:58:14 +00:00
vcard : " http://nwalsh.com/rdf/vCard# " ,
2006-08-20 04:35:04 +00:00
link : " http://purl.org/rss/1.0/modules/link/ " ,
2006-08-30 19:57:23 +00:00
fs : " http://www.zotero.org/namespaces/export# "
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ;
2006-08-05 20:58:45 +00:00
callNumberTypes = [
n . dcterms + " LCC " , n . dcterms + " DDC " , n . dcterms + " UDC "
] ;
2006-10-02 23:15:27 +00:00
var nodes = Zotero . RDF . getAllResources ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( ! nodes ) {
return false ;
}
2006-08-05 20:58:45 +00:00
/ / keep track of collections while we ' ' re looping through
var collections = new Array ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-05 20:58:45 +00:00
for each ( var node in nodes ) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
newItem . itemID = Zotero . RDF . getResourceURI ( node ) ;
2006-08-05 20:58:45 +00:00
var container = undefined ;
2006-08-20 04:35:04 +00:00
/ / figure out if this is a part of another resource , or a linked
/ / attachment
2006-10-02 23:15:27 +00:00
if ( Zotero . RDF . getSources ( node , n . dcterms + " isPartOf " ) | |
Zotero . RDF . getSources ( node , n . link + " link " ) ) {
2006-08-20 04:35:04 +00:00
continue ;
}
2006-08-05 20:58:45 +00:00
/ / type
2006-10-02 23:15:27 +00:00
var type = Zotero . RDF . getTargets ( node , rdf + " type " ) ;
2006-08-05 20:58:45 +00:00
/ / also deal with type detection based on parts , so we can differentiate
/ / magazine and journal articles , and find container elements
var isPartOf = getFirstResults ( node , [ n . dcterms + " isPartOf " ] ) ;
if ( type ) {
2006-10-02 23:15:27 +00:00
type = Zotero . RDF . getResourceURI ( type [ 0 ] ) ;
2006-08-05 20:58:45 +00:00
if ( type = = n . bib + " Book " ) {
newItem . itemType = " book " ;
} else if ( type = = n . bib + " BookSection " ) {
newItem . itemType = " bookSection " ;
container = getNodeByType ( isPartOf , n . bib + " Book " ) ;
} else if ( type = = n . bib + " Article " ) { / / choose between journal ,
/ / newspaper , and magazine
/ / articles
if ( container = getNodeByType ( isPartOf , n . bib + " Journal " ) ) {
newItem . itemType = " journalArticle " ;
} else if ( container = getNodeByType ( isPartOf , n . bib + " Periodical " ) ) {
newItem . itemType = " magazineArticle " ;
} else if ( container = getNodeByType ( isPartOf , n . bib + " Newspaper " ) ) {
newItem . itemType = " newspaperArticle " ;
}
} else if ( type = = n . bib + " Thesis " ) {
newItem . itemType = " thesis " ;
} else if ( type = = n . bib + " Letter " ) {
newItem . itemType = " letter " ;
} else if ( type = = n . bib + " Manuscript " ) {
newItem . itemType = " manuscript " ;
} else if ( type = = n . bib + " Interview " ) {
newItem . itemType = " interview " ;
} else if ( type = = n . bib + " MotionPicture " ) {
newItem . itemType = " film " ;
} else if ( type = = n . bib + " Illustration " ) {
newItem . itemType = " illustration " ;
} else if ( type = = n . bib + " Document " ) {
2006-10-02 00:00:50 +00:00
newItem . itemType = " webpage " ;
2006-08-05 20:58:45 +00:00
} else if ( type = = n . bib + " Memo " ) {
/ / check to see if this note is independent
2006-08-20 04:35:04 +00:00
if ( isPart ( node ) ) {
2006-08-05 20:58:45 +00:00
continue ;
}
newItem . itemType = " note " ;
} else if ( type = = n . bib + " Collection " ) {
/ / skip collections until all the items are done
collections . push ( node ) ;
continue ;
2006-08-20 04:35:04 +00:00
} else if ( type = = n . fs + " Attachment " ) {
2006-08-18 05:58:14 +00:00
/ / check to see if file is independent
2006-08-20 04:35:04 +00:00
if ( isPart ( node ) ) {
2006-08-18 05:58:14 +00:00
continue ;
}
/ / process as file
newItem . itemType = " attachment " ;
handleAttachment ( node , newItem ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( newItem ) ;
2006-08-18 05:58:14 +00:00
newItem . complete ( ) ;
continue ;
2006-08-05 20:58:45 +00:00
} else { / / default to book
newItem . itemType = " book " ;
}
}
2006-07-05 21:44:01 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / title
newItem . title = getFirstResults ( node , [ n . dc + " title " ] , true ) ;
2006-08-05 20:58:45 +00:00
if ( newItem . itemType ! = " note " & & ! newItem . title ) { / / require the title
/ / ( if not a note )
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
continue ;
}
2006-08-05 20:58:45 +00:00
/ / regular author - type creators
var creators = getFirstResults ( node , [ n . bib + " authors " , n . dc + " creator " ] ) ;
handleCreators ( newItem , creators , " author " ) ;
/ / editors
var creators = getFirstResults ( node , [ n . bib + " editors " ] ) ;
handleCreators ( newItem , creators , " editor " ) ;
/ / contributors
var creators = getFirstResults ( node , [ n . bib + " contributors " ] ) ;
handleCreators ( newItem , creators , " contributor " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / source
newItem . source = getFirstResults ( node , [ n . dc + " source " ] , true ) ;
/ / rights
newItem . rights = getFirstResults ( node , [ n . dc + " rights " ] , true ) ;
2006-08-05 20:58:45 +00:00
/ / section
var section = getNodeByType ( isPartOf , n . bib + " Part " ) ;
if ( section ) {
newItem . section = getFirstResults ( section , [ n . dc + " title " ] , true ) ;
}
/ / publication
if ( container ) {
2006-08-06 17:34:41 +00:00
newItem . publicationTitle = getFirstResults ( container , [ n . dc + " title " ] , true ) ;
2006-08-05 20:58:45 +00:00
}
/ / series
var series = getNodeByType ( isPartOf , n . bib + " Series " ) ;
if ( series ) {
2006-08-06 17:34:41 +00:00
newItem . seriesTitle = getFirstResults ( container , [ n . dc + " title " ] , true ) ;
2006-08-05 20:58:45 +00:00
}
/ / volume
newItem . volume = getFirstResults ( ( container ? container : node ) , [ n . prism + " volume " ] , true ) ;
/ / number
2006-08-06 17:34:41 +00:00
newItem . issue = getFirstResults ( ( container ? container : node ) , [ n . prism + " number " ] , true ) ;
2006-08-05 20:58:45 +00:00
/ / edition
newItem . edition = getFirstResults ( node , [ n . prism + " edition " ] , true ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / publisher
2006-08-05 20:58:45 +00:00
var publisher = getFirstResults ( node , [ n . dc + " publisher " ] ) ;
if ( publisher ) {
if ( typeof ( publisher [ 0 ] ) = = " string " ) {
newItem . publisher = publisher [ 0 ] ;
} else {
2006-10-02 23:15:27 +00:00
var type = Zotero . RDF . getTargets ( publisher [ 0 ] , rdf + " type " ) ;
2006-08-05 20:58:45 +00:00
if ( type ) {
2006-10-02 23:15:27 +00:00
type = Zotero . RDF . getResourceURI ( type [ 0 ] ) ;
2006-08-05 20:58:45 +00:00
if ( type = = n . foaf + " Organization " ) { / / handle foaf organizational publishers
newItem . publisher = getFirstResults ( publisher [ 0 ] , [ n . foaf + " name " ] , true ) ;
var place = getFirstResults ( publisher [ 0 ] , [ n . vcard + " adr " ] ) ;
if ( place ) {
newItem . place = getFirstResults ( place [ 0 ] , [ n . vcard + " locality " ] ) ;
}
}
}
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / ( this will get ignored except for films , where we encode distributor as publisher )
2006-08-05 20:58:45 +00:00
newItem . distributor = newItem . publisher ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / date
newItem . date = getFirstResults ( node , [ n . dc + " date " ] , true ) ;
2006-08-06 17:34:41 +00:00
/ / accessDate
newItem . accessDate = getFirstResults ( node , [ n . dcterms + " dateSubmitted " ] , true ) ;
/ / lastModified
newItem . lastModified = getFirstResults ( node , [ n . dcterms + " modified " ] , true ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / identifier
var identifiers = getFirstResults ( node , [ n . dc + " identifier " ] ) ;
2006-08-05 20:58:45 +00:00
if ( container ) {
var containerIdentifiers = getFirstResults ( container , [ n . dc + " identifier " ] ) ;
/ / concatenate sets of identifiers
if ( containerIdentifiers ) {
if ( identifiers ) {
identifiers = identifiers . concat ( containerIdentifiers ) ;
} else {
identifiers = containerIdentifiers ;
}
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( identifiers ) {
for ( var i in identifiers ) {
2006-08-06 17:34:41 +00:00
var beforeSpace = identifiers [ i ] . substr ( 0 , identifiers [ i ] . indexOf ( " " ) ) . toUpperCase ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-08-06 17:34:41 +00:00
if ( beforeSpace = = " ISBN " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . ISBN = identifiers [ i ] . substr ( 5 ) . toUpperCase ( ) ;
2006-08-06 17:34:41 +00:00
} else if ( beforeSpace = = " ISSN " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . ISSN = identifiers [ i ] . substr ( 5 ) . toUpperCase ( ) ;
2006-08-06 17:34:41 +00:00
} else if ( beforeSpace = = " DOI " ) {
newItem . DOI = identifiers [ i ] . substr ( 4 ) ;
2006-08-05 20:58:45 +00:00
} else if ( ! newItem . accessionNumber ) {
newItem . accessionNumber = identifiers [ i ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
}
2006-08-06 17:34:41 +00:00
/ / archiveLocation
2006-08-05 20:58:45 +00:00
newItem . archiveLocation = getFirstResults ( node , [ n . dc + " coverage " ] , true ) ;
2006-08-06 17:34:41 +00:00
/ / type
newItem . type = newItem . thesisType = getFirstResults ( node , [ n . dc + " type " ] , true ) ;
/ / journalAbbreviation
newItem . journalAbbreviation = getFirstResults ( ( container ? container : node ) , [ n . dcterms + " alternative " ] , true ) ;
2006-08-05 20:58:45 +00:00
/ / see also
2006-08-20 04:35:04 +00:00
processSeeAlso ( node , newItem ) ;
2006-08-31 00:04:11 +00:00
/ / description
newItem . extra = getFirstResults ( node , [ n . dc + " description " ] , true ) ;
2006-08-05 20:58:45 +00:00
/* * NOTES * */
2006-10-02 23:15:27 +00:00
var referencedBy = Zotero . RDF . getTargets ( node , n . dcterms + " isReferencedBy " ) ;
2006-08-05 20:58:45 +00:00
for each ( var referentNode in referencedBy ) {
2006-10-02 23:15:27 +00:00
var type = Zotero . RDF . getTargets ( referentNode , rdf + " type " ) ;
if ( type & & Zotero . RDF . getResourceURI ( type [ 0 ] ) = = n . bib + " Memo " ) {
2006-08-05 20:58:45 +00:00
/ / if this is a memo
var note = new Array ( ) ;
note . note = getFirstResults ( referentNode , [ rdf + " value " , n . dc + " description " ] , true ) ;
if ( note . note ! = undefined ) {
/ / handle see also
2006-08-20 04:35:04 +00:00
processSeeAlso ( referentNode , note ) ;
processTags ( referentNode , note ) ;
2006-08-05 20:58:45 +00:00
/ / add note
newItem . notes . push ( note ) ;
}
}
}
if ( newItem . itemType = = " note " ) {
/ / add note for standalone
newItem . note = getFirstResults ( node , [ rdf + " value " , n . dc + " description " ] , true ) ;
}
/* * TAGS * */
var subjects = getFirstResults ( node , [ n . dc + " subject " ] ) ;
for each ( var subject in subjects ) {
if ( typeof ( subject ) = = " string " ) { / / a regular tag
newItem . tags . push ( subject ) ;
} else { / / a call number
2006-10-02 23:15:27 +00:00
var type = Zotero . RDF . getTargets ( subject , rdf + " type " ) ;
2006-08-05 20:58:45 +00:00
if ( type ) {
2006-10-02 23:15:27 +00:00
type = Zotero . RDF . getResourceURI ( type [ 0 ] ) ;
if ( Zotero . Utilities . inArray ( type , callNumberTypes ) ) {
2006-08-05 20:58:45 +00:00
newItem . callNumber = getFirstResults ( subject , [ rdf + " value " ] , true ) ;
}
}
}
}
2006-08-18 05:58:14 +00:00
2006-08-20 04:35:04 +00:00
/* * ATTACHMENTS * */
var relations = getFirstResults ( node , [ n . link + " link " ] ) ;
for each ( var relation in relations ) {
2006-10-02 23:15:27 +00:00
var type = Zotero . RDF . getTargets ( relation , rdf + " type " ) ;
if ( Zotero . RDF . getResourceURI ( type [ 0 ] ) = = n . fs + " Attachment " ) {
2006-08-20 04:35:04 +00:00
newItem . attachments . push ( handleAttachment ( relation ) ) ;
2006-08-18 05:58:14 +00:00
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( newItem ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
}
2006-08-05 20:58:45 +00:00
/* COLLECTIONS */
2006-08-18 05:58:14 +00:00
for each ( var collection in collections ) {
2006-10-02 23:15:27 +00:00
if ( ! Zotero . RDF . getArcsIn ( collection ) ) {
var newCollection = new Zotero . Collection ( ) ;
2006-08-05 20:58:45 +00:00
processCollection ( collection , newCollection ) ;
newCollection . complete ( ) ;
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 3 , ' RIS ' , ' Simon Kornblith ' , ' ris ' ,
' Zotero.configure("dataMode", "line");
Zotero . addOption ( " exportNotes " , true ) ;
2006-08-08 02:46:52 +00:00
function detectImport ( ) {
2006-08-08 21:17:07 +00:00
var line ;
2006-10-02 23:15:27 +00:00
while ( ( line = Zotero . read ( ) ) ! = = " false " ) {
2006-08-26 03:50:15 +00:00
line = line . replace ( / ^ \ s + / , " " ) ;
if ( line ! = " " ) {
2006-08-08 02:46:52 +00:00
if ( line . substr ( 0 , 6 ) = = " TY - " ) {
return true ;
} else {
return false ;
}
}
}
} ' ,
2006-09-09 22:00:04 +00:00
' var fieldMap = {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
ID : " itemID " ,
T1 : " title " ,
2006-08-06 17:34:41 +00:00
T3 : " seriesTitle " ,
JF : " publicationTitle " ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
VL : " volume " ,
2006-08-06 17:34:41 +00:00
IS : " issue " ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
CP : " place " ,
2006-09-06 04:45:19 +00:00
PB : " publisher " ,
JA : " journalAbbreviation "
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ;
var inputFieldMap = {
TI : " title " ,
CT : " title " ,
2006-08-06 17:34:41 +00:00
JO : " publicationTitle " ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
CY : " place "
} ;
2006-10-02 00:00:50 +00:00
/ / TODO : figure out if these are the best types for letter , interview , webpage
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var typeMap = {
book : " BOOK " ,
bookSection : " CHAP " ,
journalArticle : " JOUR " ,
magazineArticle : " MGZN " ,
newspaperArticle : " NEWS " ,
thesis : " THES " ,
letter : " PCOMM " ,
2006-09-07 22:10:26 +00:00
manuscript : " PAMP " ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
interview : " PCOMM " ,
film : " MPCT " ,
artwork : " ART " ,
2006-10-02 00:00:50 +00:00
webpage : " ELEC "
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} ;
/ / supplements outputTypeMap for importing
/ / TODO : BILL , CASE , COMP , CONF , DATA , HEAR , MUSIC , PAT , SOUND , STAT
var inputTypeMap = {
ABST : " journalArticle " ,
ADVS : " film " ,
CTLG : " magazineArticle " ,
GEN : " book " ,
INPR : " manuscript " ,
JFULL : " journalArticle " ,
MAP : " artwork " ,
2006-09-07 01:23:13 +00:00
PAMP : " manuscript " ,
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
RPRT : " book " ,
SER : " book " ,
SLIDE : " artwork " ,
UNBILL : " manuscript " ,
VIDEO : " film "
} ;
function processTag ( item , tag , value ) {
if ( fieldMap [ tag ] ) {
item [ fieldMap [ tag ] ] = value ;
2006-08-08 21:17:07 +00:00
} else if ( inputFieldMap [ tag ] ) {
item [ inputFieldMap [ tag ] ] = value ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( tag = = " TY " ) {
/ / look for type
/ / first check typeMap
for ( var i in typeMap ) {
if ( value = = typeMap [ i ] ) {
item . itemType = i ;
}
}
/ / then check inputTypeMap
if ( ! item . itemType ) {
if ( inputTypeMap [ value ] ) {
item . itemType = inputTypeMap [ value ] ;
} else {
/ / default to generic from inputTypeMap
item . itemType = inputTypeMap [ " GEN " ] ;
}
}
} else if ( tag = = " BT " ) {
/ / ignore , unless this is a book or unpublished work , as per spec
if ( item . itemType = = " book " | | item . itemType = = " manuscript " ) {
item . title = value ;
2006-09-06 04:45:19 +00:00
} else {
item . backupPublicationTitle = value ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-09-06 04:45:19 +00:00
} else if ( tag = = " T2 " ) {
item . backupPublicationTitle = value ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( tag = = " A1 " | | tag = = " AU " ) {
/ / primary author
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var names = value . split ( / , ? / ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
item . creators . push ( { lastName : names [ 0 ] , firstName : names [ 1 ] , creatorType : " author " } ) ;
} else if ( tag = = " A2 " | | tag = = " ED " ) {
/ / contributing author
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var names = value . split ( / , ? / ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
item . creators . push ( { lastName : names [ 0 ] , firstName : names [ 1 ] , creatorType : " contributor " } ) ;
} else if ( tag = = " Y1 " | | tag = = " PY " ) {
/ / year or date
var dateParts = value . split ( " / " ) ;
if ( dateParts . length = = 1 ) {
/ / technically , if there ' ' s only one date part , the file isn ' ' t valid
2006-09-06 04:45:19 +00:00
/ / RIS , but EndNote writes this , so we have to too
2006-08-31 00:04:11 +00:00
item . date = value ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else {
2006-08-31 00:04:11 +00:00
/ / in the case that we have a year and other data , format that way
var month = parseInt ( dateParts [ 1 ] ) ;
if ( month ) {
month - - ;
2006-09-07 22:10:26 +00:00
} else {
month = undefined ;
2006-08-31 00:04:11 +00:00
}
2006-10-02 23:15:27 +00:00
item . date = Zotero . Utilities . formatDate ( { year : dateParts [ 0 ] ,
2006-08-31 00:04:11 +00:00
month : month ,
day : dateParts [ 2 ] ,
part : dateParts [ 3 ] } ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
2006-09-06 04:45:19 +00:00
} else if ( tag = = " Y2 " ) {
/ / the secondary date field can mean two things , a secondary date , or an
/ / invalid EndNote - style date . let ' ' s see which one this is .
var dateParts = value . split ( " / " ) ;
if ( dateParts . length ! = 4 ) {
/ / an invalid date . it ' ' s from EndNote .
if ( item . date & & value . indexOf ( item . date ) = = - 1 ) {
/ / append existing year
value + = " " + item . date ;
}
item . date = value ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( tag = = " N1 " | | tag = = " AB " ) {
/ / notes
2006-09-06 04:45:19 +00:00
if ( value ! = item . title ) { / / why does EndNote do this ! ?
item . notes . push ( { note : value } ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} else if ( tag = = " KW " ) {
/ / keywords / tags
item . tags . push ( value ) ;
} else if ( tag = = " SP " ) {
/ / start page
if ( ! item . pages ) {
item . pages = value ;
} else if ( item . pages [ 0 ] = = " - " ) { / / already have ending page
item . pages = value + item . pages ;
} else { / / multiple ranges ? hey , it ' ' s a possibility
item . pages + = " , " + value ;
}
} else if ( tag = = " EP " ) {
/ / end page
if ( value ) {
2006-09-06 04:45:19 +00:00
if ( ! item . pages ) {
item . pages = value ;
} else if ( value ! = item . pages ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
item . pages + = " - " + value ;
}
}
} else if ( tag = = " SN " ) {
/ / ISSN / ISBN - just add both
if ( ! item . ISBN ) {
item . ISBN = value ;
}
if ( ! item . ISSN ) {
item . ISSN = value ;
}
2006-08-26 03:50:15 +00:00
} else if ( tag = = " UR " | | tag = = " L1 " | | tag = = " L2 " | | tag = = " L4 " ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / URL
2006-08-26 03:50:15 +00:00
if ( ! item . url ) {
item . url = value ;
}
if ( tag = = " UR " ) {
item . attachments . push ( { url : value } ) ;
} else if ( tag = = " L1 " ) {
item . attachments . push ( { url : value , mimeType : " application/pdf " ,
title : " Full Text (PDF) " , downloadable : true } ) ;
} else if ( tag = = " L2 " ) {
item . attachments . push ( { url : value , mimeType : " text/html " ,
title : " Full Text (HTML) " , downloadable : true } ) ;
} else if ( tag = = " L4 " ) {
item . attachments . push ( { url : value ,
title : " Image " , downloadable : true } ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
2006-09-09 22:00:04 +00:00
function completeItem ( item ) {
/ / if backup publication title exists but not proper , use backup
/ / ( hack to get newspaper titles from EndNote )
if ( item . backupPublicationTitle ) {
if ( ! item . publicationTitle ) {
item . publicationTitle = item . backupPublicationTitle ;
}
item . backupPublicationTitle = undefined ;
}
item . complete ( ) ;
}
2006-08-17 07:56:01 +00:00
function doImport ( attachments ) {
2006-09-05 07:51:55 +00:00
/ / this is apparently the proper character set for RIS , although i ' ' m not
/ / sure how many people follow this
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " IBM850 " ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var line = true ;
var tag = data = false ;
do { / / first valid line is type
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " ignoring " + line ) ;
line = Zotero . read ( ) ;
2006-08-26 03:50:15 +00:00
line = line . replace ( / ^ \ s + / , " " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
} while ( line ! = = false & & line . substr ( 0 , 6 ) ! = " TY - " ) ;
2006-10-02 23:15:27 +00:00
var item = new Zotero . Item ( ) ;
2006-08-17 07:56:01 +00:00
var i = 0 ;
if ( attachments & & attachments [ i ] ) {
item . attachments = attachments [ i ] ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var tag = " TY " ;
var data = line . substr ( 6 ) ;
2006-10-02 23:15:27 +00:00
while ( ( line = Zotero . read ( ) ) ! = = false ) { / / until EOF
2006-08-26 03:50:15 +00:00
line = line . replace ( / ^ \ s + / , " " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( line . substr ( 2 , 4 ) = = " - " ) {
/ / if this line is a tag , take a look at the previous line to map
/ / its tag
if ( tag ) {
processTag ( item , tag , data ) ;
}
/ / then fetch the tag and data from this line
tag = line . substr ( 0 , 2 ) ;
data = line . substr ( 6 ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " tag: '' " + tag + " ''; data: '' " + data + " '' " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( tag = = " ER " ) { / / ER signals end of reference
/ / unset info
tag = data = false ;
/ / new item
2006-09-09 22:00:04 +00:00
completeItem ( item ) ;
2006-10-02 23:15:27 +00:00
item = new Zotero . Item ( ) ;
2006-08-17 07:56:01 +00:00
i + + ;
if ( attachments & & attachments [ i ] ) {
item . attachments = attachments [ i ] ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
} else {
/ / otherwise , assume this is data from the previous line continued
if ( tag ) {
2006-09-09 22:00:04 +00:00
if ( data [ data . length - 1 ] = = " " ) {
data + = line ;
} else {
data + = " " + line ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
}
if ( tag ) { / / save any unprocessed tags
processTag ( item , tag , data ) ;
2006-09-09 22:00:04 +00:00
completeItem ( item ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
function addTag ( tag , value ) {
if ( value ) {
2006-10-02 23:15:27 +00:00
Zotero . write ( tag + " - " + value + " \r\n " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
function doExport ( ) {
2006-09-05 07:51:55 +00:00
/ / this is apparently the proper character set for RIS , although i ' ' m not
/ / sure how many people follow this
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " IBM850 " ) ;
2006-09-05 07:51:55 +00:00
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var item ;
2006-10-02 23:15:27 +00:00
while ( item = Zotero . nextItem ( ) ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / can ' ' t store independent notes in RIS
2006-09-09 22:00:04 +00:00
if ( item . itemType = = " note " | | item . itemType = = " attachment " ) {
2006-07-05 21:44:01 +00:00
continue ;
}
/ / type
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
addTag ( " TY " , typeMap [ item . itemType ] ) ;
/ / use field map
for ( var j in fieldMap ) {
addTag ( j , item [ fieldMap [ j ] ] ) ;
}
2006-07-05 21:44:01 +00:00
/ / creators
for ( var j in item . creators ) {
/ / only two types , primary and secondary
var risTag = " A1 "
if ( item . creators [ j ] . creatorType ! = " author " ) {
risTag = " A2 " ;
}
addTag ( risTag , item . creators [ j ] . lastName + " , " + item . creators [ j ] . firstName ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-07-05 21:44:01 +00:00
/ / date
if ( item . date ) {
2006-10-02 23:15:27 +00:00
var date = Zotero . Utilities . strToDate ( item . date ) ;
2006-09-06 04:45:19 +00:00
var string = date . year + " / " ;
if ( date . month ! = undefined ) {
/ / deal with javascript months
date . month + + ;
if ( date . month < 10 ) string + = " 0 " ;
string + = date . month ;
}
string + = " / " ;
if ( date . day ! = undefined ) {
if ( date . day < 10 ) string + = " 0 " ;
string + = date . day ;
2006-07-05 21:44:01 +00:00
}
2006-09-06 04:45:19 +00:00
string + = " / " ;
if ( date . part ! = undefined ) {
string + = date . part ;
}
addTag ( " PY " , string ) ;
2006-07-05 21:44:01 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-07-05 21:44:01 +00:00
/ / notes
2006-10-02 23:15:27 +00:00
if ( Zotero . getOption ( " exportNotes " ) ) {
2006-08-08 23:00:33 +00:00
for ( var j in item . notes ) {
addTag ( " N1 " , item . notes [ j ] . note . replace ( / [ \ r \ n ] / g , " " ) ) ;
}
2006-07-05 21:44:01 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / tags
for ( var j in item . tags ) {
addTag ( " KY " , item . tags [ j ] ) ;
}
2006-07-05 21:44:01 +00:00
/ / pages
if ( item . pages ) {
2006-09-06 04:45:19 +00:00
if ( item . itemType = = " book " ) {
addTag ( " EP " , item . pages ) ;
} else {
2006-10-02 23:15:27 +00:00
var range = Zotero . Utilities . getPageRange ( item . pages ) ;
2006-09-06 04:45:19 +00:00
addTag ( " SP " , range [ 0 ] ) ;
addTag ( " EP " , range [ 1 ] ) ;
}
2006-07-05 21:44:01 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-07-05 21:44:01 +00:00
/ / ISBN / ISSN
addTag ( " SN " , item . ISBN ) ;
addTag ( " SN " , item . ISSN ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-07-05 21:44:01 +00:00
/ / URL
if ( item . url ) {
addTag ( " UR " , item . url ) ;
} else if ( item . source & & item . source . substr ( 0 , 7 ) = = " http:// " ) {
addTag ( " UR " , item . source ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
2006-10-02 23:15:27 +00:00
Zotero . write ( " ER - \r\n\r\n " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 881f60f2-0802-411a-9228-ce5f47b64c7d ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 3 , ' Refer/BibIX ' , ' Simon Kornblith ' , ' txt ' ,
' Zotero.configure("dataMode", "line");
2006-09-09 22:00:04 +00:00
function detectImport ( ) {
var lineRe = / % [ A - Z0 - 9 \ * \ $ ] . + / ;
var line ;
var matched = 0 ;
2006-10-02 23:15:27 +00:00
while ( ( line = Zotero . read ( ) ) ! = = " false " ) {
2006-09-09 22:00:04 +00:00
line = line . replace ( / ^ \ s + / , " " ) ;
if ( line ! = " " ) {
if ( lineRe . test ( line ) ) {
matched + + ;
if ( matched = = 2 ) {
/ / threshold is two lines
return true ;
}
} else {
return false ;
}
}
}
} ' ,
' var fieldMap = {
T : " title " ,
S : " seriesTitle " ,
V : " volume " ,
N : " issue " ,
C : " place " ,
I : " publisher " ,
R : " type " ,
P : " pages " ,
W : " archiveLocation " ,
" * " : " rights " ,
" @ " : " ISBN " ,
L : " callNumber " ,
M : " accessionNumber " ,
U : " url " ,
7 : " edition "
} ;
var inputFieldMap = {
J : " publicationTitle " ,
B : " publicationTitle " ,
9 : " type "
} ;
/ / TODO : figure out if these are the best types for personal communication
var typeMap = {
book : " Book " ,
bookSection : " Book Section " ,
journalArticle : " Journal Article " ,
magazineArticle : " Magazine Article " ,
newspaperArticle : " Newspaper Article " ,
thesis : " Thesis " ,
letter : " Personal Communication " ,
manuscript : " Unpublished Work " ,
interview : " Personal Communication " ,
film : " Audiovisual Material " ,
artwork : " Artwork " ,
2006-10-02 00:00:50 +00:00
webpage : " Electronic Source "
2006-09-09 22:00:04 +00:00
} ;
/ / supplements outputTypeMap for importing
/ / TODO : BILL , CASE , COMP , CONF , DATA , HEAR , MUSIC , PAT , SOUND , STAT
var inputTypeMap = {
" Generic " : " book "
} ;
var isEndNote = false ;
function processTag ( item , tag , value ) {
if ( fieldMap [ tag ] ) {
item [ fieldMap [ tag ] ] = value ;
} else if ( inputFieldMap [ tag ] ) {
item [ inputFieldMap [ tag ] ] = value ;
} else if ( tag = = " 0 " ) {
/ / EndNote type
isEndNote = true ;
/ / first check typeMap
for ( var i in typeMap ) {
if ( value = = typeMap [ i ] ) {
item . itemType = i ;
}
}
/ / then check inputTypeMap
if ( ! item . itemType ) {
if ( inputTypeMap [ value ] ) {
item . itemType = inputTypeMap [ value ] ;
} else {
/ / default to generic from inputTypeMap
item . itemType = inputTypeMap [ " Generic " ] ;
}
}
} else if ( tag = = " A " | | tag = = " E " | | tag = = " ? " ) {
if ( tag = = " A " ) {
var type = " author " ;
} else if ( tag = = " E " ) {
var type = " editor " ;
} else if ( tag = = " ? " ) {
var type = " translator " ;
}
/ / use comma only if EndNote format
if ( isEndNote ) {
2006-10-02 23:15:27 +00:00
item . creators . push ( Zotero . Utilities . cleanAuthor ( value , type , true ) ) ;
2006-09-09 22:00:04 +00:00
} else {
2006-10-02 23:15:27 +00:00
item . creators . push ( Zotero . Utilities . cleanAuthor ( value , type ) ) ;
2006-09-09 22:00:04 +00:00
}
} else if ( tag = = " Q " ) {
item . creators . push ( { creatorType : " author " , lastName : value , isInstitution : true } ) ;
} else if ( tag = = " H " | | tag = = " O " ) {
item . extra + = " \n " + value ;
} else if ( tag = = " Z " ) {
item . notes . push ( { note : value } ) ;
} else if ( tag = = " D " ) {
if ( item . date ) {
if ( item . date . indexOf ( value ) = = - 1 ) {
item . date + = " " + value ;
}
} else {
item . date = value ;
}
} else if ( tag = = " 8 " ) {
if ( item . date ) {
if ( value . indexOf ( item . date ) = = - 1 ) {
item . date + = " " + value ;
}
} else {
item . date = value ;
}
} else if ( tag = = " K " ) {
item . tags = value . split ( " \n " ) ;
}
}
function doImport ( ) {
/ / no character set is defined for this format . we use UTF - 8 .
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " UTF-8 " ) ;
2006-09-09 22:00:04 +00:00
var line = true ;
var tag = data = false ;
do { / / first valid line is type
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " ignoring " + line ) ;
line = Zotero . read ( ) ;
2006-09-09 22:00:04 +00:00
line = line . replace ( / ^ \ s + / , " " ) ;
} while ( line ! = = false & & line [ 0 ] ! = " % " ) ;
2006-10-02 23:15:27 +00:00
var item = new Zotero . Item ( ) ;
2006-09-09 22:00:04 +00:00
var tag = line [ 1 ] ;
var data = line . substr ( 3 ) ;
2006-10-02 23:15:27 +00:00
while ( ( line = Zotero . read ( ) ) ! = = false ) { / / until EOF
2006-09-09 22:00:04 +00:00
line = line . replace ( / ^ \ s + / , " " ) ;
if ( ! line ) {
if ( tag ) {
processTag ( item , tag , data ) ;
/ / unset info
tag = data = readRecordEntry = false ;
/ / new item
item . complete ( ) ;
2006-10-02 23:15:27 +00:00
item = new Zotero . Item ( ) ;
2006-09-09 22:00:04 +00:00
}
} else if ( line [ 0 ] = = " % " & & line [ 2 ] = = " " ) {
/ / if this line is a tag , take a look at the previous line to map
/ / its tag
if ( tag ) {
processTag ( item , tag , data ) ;
}
/ / then fetch the tag and data from this line
tag = line [ 1 ] ;
data = line . substr ( 3 ) ;
} else {
/ / otherwise , assume this is data from the previous line continued
if ( tag ) {
data + = " \n " + line ;
}
}
}
if ( tag ) { / / save any unprocessed tags
processTag ( item , tag , data ) ;
item . complete ( ) ;
}
}
function addTag ( tag , value ) {
if ( value ) {
2006-10-02 23:15:27 +00:00
Zotero . write ( " % " + tag + " " + value + " \r\n " ) ;
2006-09-09 22:00:04 +00:00
}
}
function doExport ( ) {
/ / use UTF - 8 to export
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " UTF-8 " ) ;
2006-09-09 22:00:04 +00:00
var item ;
2006-10-02 23:15:27 +00:00
while ( item = Zotero . nextItem ( ) ) {
2006-09-09 22:00:04 +00:00
/ / can ' ' t store independent notes in RIS
if ( item . itemType = = " note " | | item . itemType = = " attachment " ) {
continue ;
}
/ / type
addTag ( " 0 " , typeMap [ item . itemType ] ) ;
/ / use field map
for ( var j in fieldMap ) {
addTag ( j , item [ fieldMap [ j ] ] ) ;
}
/ / creators
for ( var j in item . creators ) {
var referTag = " A " ;
if ( item . creators [ j ] . creatorType = = " editor " ) {
referTag = " E " ;
} else if ( item . creators [ j ] . creatorType = = " translator " ) {
referTag = " ? " ;
}
addTag ( referTag , item . creators [ j ] . lastName + ( item . creators [ j ] . firstName ? " , " + item . creators [ j ] . firstName : " " ) ) ;
}
/ / date
addTag ( " D " , item . date ) ;
/ / tags
if ( item . tags ) {
addTag ( " K " , item . tags . join ( " \r\n " ) ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . write ( " \r\n " ) ;
2006-09-09 22:00:04 +00:00
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' 9cb70025-a888-4a29-a210-93ec52da40d4 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 3 , ' BibTeX ' , ' Simon Kornblith ' , ' bib ' ,
' Zotero.configure("dataMode", "block");
2006-09-07 22:10:26 +00:00
function detectImport ( ) {
var block = " " ;
var read ;
/ / read 20 chars out of the file
2006-10-02 23:15:27 +00:00
while ( read = Zotero . read ( 1 ) ) {
2006-09-07 22:10:26 +00:00
if ( read = = " % " ) {
/ / read until next newline
block = " " ;
2006-10-02 23:15:27 +00:00
while ( Zotero . read ( 1 ) ! = " \n " ) { }
2006-09-07 22:10:26 +00:00
} else if ( read = = " \n " & & block ) {
break ;
} else if ( " \n\r\t " . indexOf ( read ) = = - 1 ) {
block + = read ;
}
}
var re = / ^ @ [ a - zA - Z ] + [ \ ( \ { ] / ;
if ( re . test ( block ) ) {
return true ;
}
} ' ,
' var fieldMap = {
address : " place " ,
chapter : " section " ,
edition : " edition " ,
number : " issue " ,
type : " type " ,
series : " series " ,
title : " title " ,
volume : " volume " ,
copyright : " rights " ,
isbn : " ISBN " ,
issn : " ISSN " ,
location : " archiveLocation " ,
url : " url "
} ;
var inputFieldMap = {
booktitle : " publicationTitle " ,
journal : " publicationTitle " ,
school : " publisher " ,
publisher : " publisher "
} ;
var typeMap = {
book : " book " ,
bookSection : " inbook " ,
journalArticle : " article " ,
magazineArticle : " article " ,
newspaperArticle : " article " ,
thesis : " phdthesis " ,
letter : " misc " ,
manuscript : " unpublished " ,
interview : " misc " ,
film : " misc " ,
artwork : " misc " ,
2006-10-02 00:00:50 +00:00
webpage : " misc "
2006-09-07 22:10:26 +00:00
} ;
/ / supplements outputTypeMap for importing
var inputTypeMap = {
2006-09-08 01:59:22 +00:00
inproceedings : " journalArticle " ,
conference : " journalArticle " ,
techreport : " book " ,
2006-09-07 22:10:26 +00:00
booklet : " book " ,
incollection : " bookSection " ,
manual : " book " ,
mastersthesis : " thesis " ,
misc : " book " ,
proceedings : " book "
} ;
/*
* three - letter month abbreviations . i assume these are the same ones that the
* docs say are defined in some appendix of the LaTeX book . ( i don ' ' t have the
* LaTeX book . )
* /
var months = [ " jan " , " feb " , " mar " , " apr " , " may " , " jun " ,
" jul " , " aug " , " sep " , " oct " , " nov " , " dec " ]
/*
* this is the character table for converting TeX to Unicode . sorry , Czech
* speakers ; you ' ' ll have to add your own ( or stop using BibTeX ! )
* /
var accentedCharacters = {
/ / grave accents
192 : " \\`A " , 224 : " \\`a " ,
200 : " \\`E " , 232 : " \\`e " ,
204 : " \\`I " , 236 : " \\`i " ,
210 : " \\`O " , 242 : " \\`o " ,
217 : " \\`U " , 249 : " \\`u " ,
/ / acute accents
193 : " \\''A " , 225 : " \\''a " ,
201 : " \\''E " , 233 : " \\''e " ,
205 : " \\''I " , 237 : " \\''i " ,
211 : " \\''O " , 243 : " \\''o " ,
218 : " \\''U " , 250 : " \\''u " ,
/ / circumflexes
194 : " \\^A " , 226 : " \\^a " ,
202 : " \\^E " , 234 : " \\^e " ,
206 : " \\^I " , 238 : " \\^i " ,
212 : " \\^O " , 244 : " \\^o " ,
219 : " \\^U " , 251 : " \\^u " ,
/ / tildes
195 : " \\~A " , 227 : " \\~a " ,
213 : " \\~O " , 245 : " \\~o " ,
209 : " \\~N " , 241 : " \\~n " ,
/ / umlauts
196 : ' ' \ \ " A'', 228:''\\ " a ' ' ,
203 : ' ' \ \ " E'', 235:''\\ " e ' ' ,
207 : ' ' \ \ " I'', 239:''\\ " i ' ' ,
214 : ' ' \ \ " O'', 246:''\\ " o ' ' ,
220 : ' ' \ \ " U'', 252:''\\ " u ' ' ,
/ / cidillas
191 : " \\c{C} " , 231 : " \\c{c} " ,
/ / AE norwegian tings
198 : " {\\AE} " , 230 : " {\\ae} " ,
/ / o norwegian things
216 : " {\\o} " , 248 : " {\\O} " ,
/ / a norweigan things
197 : " {\\AA} " , 229 : " {\\aa} "
} ;
function processField ( item , field , value ) {
if ( fieldMap [ field ] ) {
item [ fieldMap [ field ] ] = value ;
} else if ( inputFieldMap [ field ] ) {
item [ inputFieldMap [ field ] ] = value ;
} else if ( field = = " author " | | field = = " editor " ) {
/ / parse authors / editors
var names = value . split ( " and " ) ;
for each ( var name in names ) {
2006-10-02 23:15:27 +00:00
item . creators . push ( Zotero . Utilities . cleanAuthor ( name , field ,
2006-09-07 22:10:26 +00:00
( name . indexOf ( " , " ) ! = - 1 ) ) ) ;
}
} else if ( field = = " institution " | | field = = " organization " ) {
item . backupPublisher = value ;
} else if ( field = = " month " ) {
var monthIndex = months . indexOf ( value . toLowerCase ( ) ) ;
if ( monthIndex ! = - 1 ) {
2006-10-02 23:15:27 +00:00
value = Zotero . Utilities . formatDate ( { month : monthIndex } ) ;
2006-09-07 22:10:26 +00:00
} else {
value + = " " ;
}
if ( item . date ) {
if ( value . indexOf ( item . date ) ! = - 1 ) {
/ / value contains year and more
item . date = value ;
} else {
item . date = value + item . date ;
}
} else {
item . date = value ;
}
} else if ( field = = " year " ) {
if ( item . date ) {
if ( item . date . indexOf ( value ) = = - 1 ) {
/ / date does not already contain year
item . date + = value ;
}
} else {
item . date = value ;
}
} else if ( field = = " pages " ) {
item . pages = value . replace ( / - - / g , " - " ) ;
} else if ( field = = " note " | | field = = " annote " ) {
item . extra + = " \n " + value ;
} else if ( field = = " howpublished " ) {
item . extra + = " \nPublished: " + value ;
} else if ( field = = " keywords " ) {
if ( value . indexOf ( " , " ) = = - 1 ) {
/ / keywords / tags
item . tags = value . split ( " " ) ;
} else {
item . tags = value . split ( / , ? / g ) ;
}
}
}
function getFieldValue ( ) {
/ / read whitespace
2006-10-02 23:15:27 +00:00
var read = Zotero . read ( 1 ) ;
2006-09-07 22:10:26 +00:00
while ( " \n\r\t " . indexOf ( read ) ! = - 1 ) {
2006-10-02 23:15:27 +00:00
read = Zotero . read ( 1 ) ;
2006-09-07 22:10:26 +00:00
}
var value = " " ;
/ / now , we have the first character of the field
if ( " 0123456789 " . indexOf ( read ) ! = - 1 ) {
/ / character is a number
2006-10-02 23:15:27 +00:00
while ( ( read = Zotero . read ( 1 ) ) & & ( " 0123456789 " . indexOf ( read ) ! = - 1 ) ) {
2006-09-07 22:10:26 +00:00
value + = read ;
}
} else if ( read = = " { " ) {
/ / character is a brace
var openBraces = 1 ;
2006-10-02 23:15:27 +00:00
while ( read = Zotero . read ( 1 ) ) {
2006-09-07 22:10:26 +00:00
if ( read = = " { " & & value [ value . length - 1 ] ! = " \\ " ) {
openBraces + + ;
value + = " { " ;
} else if ( read = = " } " & & value [ value . length - 1 ] ! = " \\ " ) {
openBraces - - ;
if ( openBraces = = 0 ) {
break ;
} else {
value + = " } " ;
}
} else {
value + = read ;
}
}
} else if ( read = = ' ' " '') {
var openBraces = 0 ;
2006-10-02 23:15:27 +00:00
while ( read = Zotero . read ( 1 ) ) {
2006-09-07 22:10:26 +00:00
if ( read = = " { " & & value [ value . length - 1 ] ! = " \\ " ) {
openBraces + + ;
value + = " { " ;
} else if ( read = = " } " & & value [ value . length - 1 ] ! = " \\ " ) {
openBraces - - ;
value + = " } " ;
} else if ( read = = ' ' " '' && openBraces == 0) {
break ;
} else {
value + = read ;
}
}
}
if ( value . length > 1 ) {
/ / replace accented characters ( yucky slow )
for ( var i in accentedCharacters ) {
value = value . replace ( accentedCharacters [ i ] , i ) ;
}
/ / kill braces
value = value . replace ( / ( [ ^ \ \ ] ) [ { } ] + / g , " $1 " ) ;
if ( value [ 0 ] = = " { " ) {
value = value . substr ( 1 ) ;
}
/ / chop off backslashes
value = value . replace ( / ( [ ^ \ \ ] ) \ \ ( [ #$%&~_^\\{}])/g, "$1$2");
value = value . replace ( / ( [ ^ \ \ ] ) \ \ ( [ #$%&~_^\\{}])/g, "$1$2");
if ( value [ 0 ] = = " \\ " & & " #$%&~_^\\{} " . indexOf ( value [ 1 ] ) ! = - 1 ) {
value = value . substr ( 1 ) ;
}
if ( value [ value . length - 1 ] = = " \\ " & & " #$%&~_^\\{} " . indexOf ( value [ value . length - 2 ] ) ! = - 1 ) {
value = value . substr ( 0 , value . length - 1 ) ;
}
value = value . replace ( / \ \ \ \ / g , " \\ " ) ;
value = value . replace ( / \ s + / g , " " ) ;
}
return value ;
}
function beginRecord ( type , closeChar ) {
type = type . toLowerCase ( )
if ( inputTypeMap [ type ] ) {
2006-10-02 23:15:27 +00:00
var item = new Zotero . Item ( inputTypeMap [ type ] ) ;
2006-09-07 22:10:26 +00:00
} else {
for ( var i in typeMap ) {
if ( typeMap [ i ] = = type ) {
2006-10-02 23:15:27 +00:00
var item = new Zotero . Item ( i ) ;
2006-09-07 22:10:26 +00:00
break ;
}
}
if ( ! item ) {
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( " discarded item from BibTeX; type was " + type ) ;
2006-09-07 22:10:26 +00:00
}
}
var field = " " ;
2006-10-02 23:15:27 +00:00
while ( read = Zotero . read ( 1 ) ) {
2006-09-07 22:10:26 +00:00
if ( read = = " = " ) { / / equals begin a field
var value = getFieldValue ( ) ;
if ( item ) {
processField ( item , field . toLowerCase ( ) , value ) ;
}
field = " " ;
} else if ( read = = " , " ) { / / commas reset
field = " " ;
} else if ( read = = closeChar ) {
if ( item ) {
if ( item . extra ) item . extra = item . extra . substr ( 1 ) ; / / chop \ n
item . complete ( ) ;
}
return ;
} else if ( " \n\r\t " . indexOf ( read ) = = - 1 ) { / / skip whitespace
field + = read ;
}
}
}
2006-09-08 01:59:22 +00:00
function doImport ( ) {
2006-09-07 22:10:26 +00:00
/ / make regular expressions out of values
var newArray = new Array ( ) ;
for ( var i in accentedCharacters ) {
newArray [ String . fromCharCode ( i ) ] = new RegExp ( accentedCharacters [ i ] . replace ( / \ \ / g , " \\\\ " ) , " g " ) ;
}
accentedCharacters = newArray ;
var read = " " , text = " " , recordCloseElement = false ;
var type = false ;
2006-10-02 23:15:27 +00:00
while ( read = Zotero . read ( 1 ) ) {
2006-09-07 22:10:26 +00:00
if ( read = = " @ " ) {
type = " " ;
} else if ( type ! = = false ) {
if ( read = = " { " ) { / / possible open character
beginRecord ( type , " } " ) ;
type = false ;
} else if ( read = = " ( " ) { / / possible open character
beginRecord ( type , " ) " ) ;
type = false ;
} else {
type + = read ;
}
}
}
}
function writeField ( field , value ) {
if ( ! value ) return ;
value = value . toString ( ) ;
/ / replace naughty chars
value = value . replace ( / ( [ #$%&~_^\\{}])/g, "\\$1");
/ / replace accented characters
for ( var i in accentedCharacters ) {
value = value . replace ( accentedCharacters [ i ] , i ) ;
}
/ / replace other accented characters
value = value . replace ( / [ \ u0080 - \ uFFFF ] / g , " ? " )
/ / write
2006-10-02 23:15:27 +00:00
Zotero . write ( " ,\n\t " + field + " = { " + value + " } " ) ;
2006-09-07 22:10:26 +00:00
}
var numberRe = / ^ [ 0 - 9 ] + / ;
function doExport ( ) {
/ / switch keys and values of accented characters
var newArray = new Array ( ) ;
for ( var i in accentedCharacters ) {
newArray [ " { " + accentedCharacters [ i ] + " } " ] = new RegExp ( String . fromCharCode ( i ) , " g " ) ;
}
accentedCharacters = newArray ;
2006-10-02 23:15:27 +00:00
Zotero . write ( " % BibTeX export generated by Zotero " + Zotero . Utilities . getVersion ( ) ) ;
2006-09-07 22:10:26 +00:00
var first = true ;
var citekeys = new Object ( ) ;
var item ;
2006-10-02 23:15:27 +00:00
while ( item = Zotero . nextItem ( ) ) {
2006-09-07 22:10:26 +00:00
/ / determine type
if ( ! typeMap [ item . itemType ] ) {
continue ;
}
/ / create a unique citation key
var basekey = " " ;
if ( item . creators & & item . creators [ 0 ] & & item . creators [ 0 ] . lastName ) {
basekey = item . creators [ 0 ] . lastName . toLowerCase ( ) ;
}
if ( item . date ) {
2006-10-02 23:15:27 +00:00
var date = Zotero . Utilities . strToDate ( item . date ) ;
2006-09-07 22:10:26 +00:00
if ( date . year & & numberRe . test ( date . year ) ) {
basekey + = date . year ;
}
}
var citekey = basekey ;
var i = 0 ;
while ( citekeys [ citekey ] ) {
i + + ;
citekey = basekey + " - " + i ;
}
citekeys [ citekey ] = true ;
/ / write citation key
2006-10-02 23:15:27 +00:00
Zotero . write ( ( first ? " " : " , " ) + " \n\n@ " + typeMap [ item . itemType ] + " { " + citekey ) ;
2006-09-07 22:10:26 +00:00
first = false ;
for ( var field in fieldMap ) {
if ( item [ fieldMap [ field ] ] ) {
writeField ( field , item [ fieldMap [ field ] ] ) ;
}
}
if ( item . publicationTitle ) {
if ( item . itemType = = " chapter " ) {
writeField ( " booktitle " , item . publicationTitle ) ;
} else {
writeField ( " journal " , item . publicationTitle ) ;
}
}
if ( item . publisher ) {
if ( item . itemType = = " thesis " ) {
writeField ( " school " , item . publisher ) ;
} else {
writeField ( " publisher " , item . publisher ) ;
}
}
if ( item . creators & & item . creators . length ) {
/ / split creators into subcategories
var author = " " ;
var editor = " " ;
for each ( var creator in item . creators ) {
var creatorString = creator . lastName ;
if ( creator . firstName ) creatorString + = " , " + creator . firstName ;
if ( creator . creatorType = = " editor " ) {
author + = " and " + creatorString ;
} else {
editor + = " and " + creatorString ;
}
}
if ( author ) {
writeField ( " author " , author . substr ( 5 ) ) ;
}
if ( editor ) {
writeField ( " author " , editor . substr ( 5 ) ) ;
}
}
if ( item . date ) {
/ / need to use non - localized abbreviation
if ( date . month ) {
writeField ( " month " , months [ date . month ] ) ;
}
if ( date . year ) {
writeField ( " year " , date . year ) ;
}
}
if ( item . extra ) {
writeField ( " note " , item . extra ) ;
}
if ( item . tags & & item . tags . length ) {
writeField ( " keywords " , item . tags . join ( " , " ) ) ;
}
2006-10-02 23:15:27 +00:00
Zotero . write ( " \n} " ) ;
2006-09-07 22:10:26 +00:00
}
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " translators " VALUES ( ' a6ee60df-1ddc-4aae-bb25-45e0537be973 ' , ' 2006-10-02 17:00:00 ' , 1 , 100 , 1 , ' MARC ' , ' Simon Kornblith ' , ' marc ' ,
2006-08-08 02:46:52 +00:00
' function detectImport() {
var marcRecordRegexp = / ^ [ 0 - 9 ] { 5 } [ a - z ] { 3 } $ /
2006-10-02 23:15:27 +00:00
var read = Zotero . read ( 8 ) ;
2006-08-08 02:46:52 +00:00
if ( marcRecordRegexp . test ( read ) ) {
return true ;
}
} ' ,
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
' var fieldTerminator = "\x1E";
var recordTerminator = " \x1D " ;
var subfieldDelimiter = " \x1F " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/*
* CLEANING FUNCTIONS
* /
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / general purpose cleaning
function clean ( value ) {
value = value . replace ( / ^ [ \ s \ . \ , \ / \ : ] + / , ' '' ' ) ;
value = value . replace ( / [ \ s \ . \ , \ / \ : ] + $ / , ' '' ' ) ;
value = value . replace ( / + / g , ' ' ' ' ) ;
var char1 = value [ 0 ] ;
var char2 = value [ value . length - 1 ] ;
if ( ( char1 = = " [ " & & char2 = = " ] " ) | | ( char1 = = " ( " & & char2 = = " ) " ) ) {
/ / chop of extraneous characters
return value . substr ( 1 , value . length - 2 ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
return value ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / number extraction
function pullNumber ( text ) {
var pullRe = / [ 0 - 9 ] + / ;
var m = pullRe . exec ( text ) ;
if ( m ) {
return m [ 0 ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / ISBN extraction
function pullISBN ( text ) {
var pullRe = / [ 0 - 9 X \ - ] + / ;
var m = pullRe . exec ( text ) ;
if ( m ) {
return m [ 0 ] ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / corporate author extraction
function corpAuthor ( author ) {
2006-10-03 21:08:02 +00:00
return { lastName : author , isInstitution : true } ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / regular author extraction
function author ( author , type , useComma ) {
2006-10-02 23:15:27 +00:00
return Zotero . Utilities . cleanAuthor ( author , type , useComma ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/*
* END CLEANING FUNCTIONS
* /
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var record = function ( ) {
this . directory = new Object ( ) ;
this . leader = " " ;
this . content = " " ;
/ / defaults
this . indicatorLength = 2 ;
this . subfieldCodeLength = 2 ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / import a binary MARC record into this record
record . prototype . importBinary = function ( record ) {
/ / get directory and leader
var directory = record . substr ( 0 , record . indexOf ( fieldTerminator ) ) ;
this . leader = directory . substr ( 0 , 24 ) ;
var directory = directory . substr ( 24 ) ;
/ / get various data
this . indicatorLength = parseInt ( this . leader [ 10 ] , 10 ) ;
this . subfieldCodeLength = parseInt ( this . leader [ 11 ] , 10 ) ;
var baseAddress = parseInt ( this . leader . substr ( 12 , 5 ) , 10 ) ;
/ / get record data
2006-09-05 03:06:22 +00:00
var contentTmp = record . substr ( baseAddress ) ;
/ / MARC wants one - byte characters , so when we have multi - byte UTF - 8
/ / sequences , add null characters so that the directory shows up right . we
/ / can strip the null s later .
this . content = " " ;
for ( i = 0 ; i < contentTmp . length ; i + + ) {
this . content + = contentTmp [ i ] ;
if ( contentTmp . charCodeAt ( i ) > 0 x00FFFF ) {
this . content + = " \x00\x00\x00 " ;
} else if ( contentTmp . charCodeAt ( i ) > 0 x0007FF ) {
this . content + = " \x00\x00 " ;
} else if ( contentTmp . charCodeAt ( i ) > 0 x00007F ) {
this . content + = " \x00 " ;
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / read directory
for ( var i = 0 ; i < directory . length ; i + = 12 ) {
var tag = parseInt ( directory . substr ( i , 3 ) , 10 ) ;
var fieldLength = parseInt ( directory . substr ( i + 3 , 4 ) , 10 ) ;
var fieldPosition = parseInt ( directory . substr ( i + 7 , 5 ) , 10 ) ;
if ( ! this . directory [ tag ] ) {
this . directory [ tag ] = new Array ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . directory [ tag ] . push ( [ fieldPosition , fieldLength ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / add a field to this record
record . prototype . addField = function ( field , indicator , value ) {
2006-08-31 05:21:41 +00:00
field = parseInt ( field , 10 ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / make sure indicator is the right length
if ( indicator . length > this . indicatorLength ) {
indicator = indicator . substr ( 0 , this . indicatorLength ) ;
} else if ( indicator . length ! = this . indicatorLength ) {
2006-10-02 23:15:27 +00:00
indicator = Zotero . Utilities . lpad ( indicator , " " , this . indicatorLength ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / add terminator
value = indicator + value + fieldTerminator ;
/ / add field to directory
if ( ! this . directory [ field ] ) {
this . directory [ field ] = new Array ( ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . directory [ field ] . push ( [ this . content . length , value . length ] ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / add field to record
this . content + = value ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / get all fields with a certain field number
record . prototype . getField = function ( field ) {
2006-08-31 05:21:41 +00:00
field = parseInt ( field , 10 ) ;
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var fields = new Array ( ) ;
/ / make sure fields exist
if ( ! this . directory [ field ] ) {
return fields ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / get fields
for ( var i in this . directory [ field ] ) {
var location = this . directory [ field ] [ i ] ;
2006-09-05 03:06:22 +00:00
/ / add to array , replacing null characters
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
fields . push ( [ this . content . substr ( location [ 0 ] , this . indicatorLength ) ,
this . content . substr ( location [ 0 ] + this . indicatorLength ,
2006-09-05 03:06:22 +00:00
location [ 1 ] - this . indicatorLength - 1 ) . replace ( / \ x00 / g , " " ) ] ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
return fields ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / get subfields from a field
record . prototype . getFieldSubfields = function ( tag ) { / / returns a two - dimensional array of values
var fields = this . getField ( tag ) ;
var returnFields = new Array ( ) ;
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
for ( var i in fields ) {
returnFields [ i ] = new Object ( ) ;
var subfields = fields [ i ] [ 1 ] . split ( subfieldDelimiter ) ;
if ( subfields . length = = 1 ) {
returnFields [ i ] [ " ? " ] = fields [ i ] [ 1 ] ;
} else {
for ( var j in subfields ) {
if ( subfields [ j ] ) {
returnFields [ i ] [ subfields [ j ] . substr ( 0 , this . subfieldCodeLength - 1 ) ] = subfields [ j ] . substr ( this . subfieldCodeLength - 1 ) ;
}
}
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #187, make berkeley's library work
closes #186, stop translators from hanging
when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
2006-08-15 19:46:42 +00:00
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
return returnFields ;
}
/ / add field to DB
record . prototype . _associateDBField = function ( item , fieldNo , part , fieldName , execMe , arg1 , arg2 ) {
var field = this . getFieldSubfields ( fieldNo ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( ' ' found ' ' + field . length + ' ' matches for ' ' + fieldNo + part ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( field ) {
for ( var i in field ) {
var value = false ;
for ( var j = 0 ; j < part . length ; j + + ) {
var myPart = part [ j ] ;
if ( field [ i ] [ myPart ] ) {
if ( value ) {
value + = " " + field [ i ] [ myPart ] ;
} else {
value = field [ i ] [ myPart ] ;
}
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
if ( value ) {
value = clean ( value ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( execMe ) {
value = execMe ( value , arg1 , arg2 ) ;
}
if ( fieldName = = " creator " ) {
item . creators . push ( value ) ;
} else {
item [ fieldName ] = value ;
}
}
}
}
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
/ / add field to DB as tags
record . prototype . _associateTags = function ( item , fieldNo , part ) {
var field = this . getFieldSubfields ( fieldNo ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
for ( var i in field ) {
for ( var j = 0 ; j < part . length ; j + + ) {
var myPart = part [ j ] ;
if ( field [ i ] [ myPart ] ) {
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
item . tags . push ( clean ( field [ i ] [ myPart ] ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
}
}
/ / this function loads a MARC record into our database
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
record . prototype . translate = function ( item ) {
/ / get item type
if ( this . leader ) {
var marcType = this . leader [ 6 ] ;
if ( marcType = = " g " ) {
item . itemType = " film " ;
} else if ( marcType = = " k " | | marcType = = " e " | | marcType = = " f " ) {
item . itemType = " artwork " ;
} else if ( marcType = = " t " ) {
item . itemType = " manuscript " ;
} else {
item . itemType = " book " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
} else {
item . itemType = " book " ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
/ / Extract ISBNs
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 020 " , " a " , " ISBN " , pullISBN ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Extract ISSNs
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 022 " , " a " , " ISSN " , pullISBN ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Extract creators
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 100 " , " a " , " creator " , author , " author " , true ) ;
this . _associateDBField ( item , " 110 " , " a " , " creator " , corpAuthor , " author " ) ;
this . _associateDBField ( item , " 111 " , " a " , " creator " , corpAuthor , " author " ) ;
this . _associateDBField ( item , " 700 " , " a " , " creator " , author , " contributor " , true ) ;
this . _associateDBField ( item , " 710 " , " a " , " creator " , corpAuthor , " contributor " ) ;
this . _associateDBField ( item , " 711 " , " a " , " creator " , corpAuthor , " contributor " ) ;
2006-10-03 21:08:02 +00:00
if ( item . itemType = = " book " & & ! item . creators . length ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / some LOC entries have no listed author , but have the author in the person subject field as the first entry
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var field = this . getFieldSubfields ( " 600 " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( field [ 0 ] ) {
2006-10-03 21:08:02 +00:00
item . creators . push ( Scholar . Utilities . cleanAuthor ( field [ 0 ] [ " a " ] , true ) ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
}
/ / Extract tags
/ / personal
this . _associateTags ( item , " 600 " , " aqtxyz " ) ;
/ / corporate
this . _associateTags ( item , " 611 " , " abtxyz " ) ;
/ / meeting
this . _associateTags ( item , " 630 " , " acetxyz " ) ;
/ / uniform title
this . _associateTags ( item , " 648 " , " atxyz " ) ;
/ / chronological
this . _associateTags ( item , " 650 " , " axyz " ) ;
/ / topical
this . _associateTags ( item , " 651 " , " abcxyz " ) ;
/ / geographic
this . _associateTags ( item , " 653 " , " axyz " ) ;
/ / uncontrolled
this . _associateTags ( item , " 653 " , " a " ) ;
/ / faceted topical term ( whatever that means )
this . _associateTags ( item , " 654 " , " abcyz " ) ;
/ / genre / form
this . _associateTags ( item , " 655 " , " abcxyz " ) ;
/ / occupation
this . _associateTags ( item , " 656 " , " axyz " ) ;
/ / function
this . _associateTags ( item , " 657 " , " axyz " ) ;
/ / curriculum objective
this . _associateTags ( item , " 658 " , " ab " ) ;
/ / hierarchical geographic place name
this . _associateTags ( item , " 662 " , " abcdfgh " ) ;
/ / Extract title
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 245 " , " ab " , " title " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Extract edition
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 250 " , " a " , " edition " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Extract place info
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 260 " , " a " , " place " ) ;
/ / Extract publisher / distributor
if ( item . itemType = = " film " ) {
this . _associateDBField ( item , " 260 " , " b " , " distributor " ) ;
} else {
this . _associateDBField ( item , " 260 " , " b " , " publisher " ) ;
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Extract year
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 260 " , " c " , " date " , pullNumber ) ;
2006-08-17 07:56:01 +00:00
/ / Extract pages
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 300 " , " a " , " pages " , pullNumber ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Extract series
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 440 " , " a " , " seriesTitle " ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / Extract call number
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
this . _associateDBField ( item , " 084 " , " ab " , " callNumber " ) ;
this . _associateDBField ( item , " 082 " , " a " , " callNumber " ) ;
this . _associateDBField ( item , " 080 " , " ab " , " callNumber " ) ;
this . _associateDBField ( item , " 070 " , " ab " , " callNumber " ) ;
this . _associateDBField ( item , " 060 " , " ab " , " callNumber " ) ;
this . _associateDBField ( item , " 050 " , " ab " , " callNumber " ) ;
2006-09-04 18:16:50 +00:00
if ( item . title ) {
2006-10-02 23:15:27 +00:00
item . title = Zotero . Utilities . capitalizeTitle ( item . title ) ;
2006-09-04 18:16:50 +00:00
}
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
}
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
function doImport ( ) {
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var text ;
var holdOver = " " ; / / part of the text held over from the last loop
2006-10-02 23:15:27 +00:00
Zotero . setCharacterSet ( " utf-8 " ) ;
2006-09-05 07:51:55 +00:00
2006-10-02 23:15:27 +00:00
while ( text = Zotero . read ( 4096 ) ) { / / read in 4096 byte increments
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
var records = text . split ( " \x1D " ) ;
2006-10-02 23:15:27 +00:00
Zotero . Utilities . debug ( records ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
if ( records . length > 1 ) {
records [ 0 ] = holdOver + records [ 0 ] ;
holdOver = records . pop ( ) ; / / skip last record , since it ' ' s not done
for ( var i in records ) {
2006-10-02 23:15:27 +00:00
var newItem = new Zotero . Item ( ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
/ / create new record
closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions
MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00
var rec = new record ( ) ;
rec . importBinary ( records [ i ] ) ;
rec . translate ( newItem ) ;
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
newItem . complete ( ) ;
}
} else {
holdOver + = text ;
}
2006-07-05 21:44:01 +00:00
}
2006-08-03 04:54:16 +00:00
} ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " csl " VALUES ( ' http://purl.org/net/xbiblio/csl/styles/apa.csl ' , ' 2006-10-02 17:00:00 ' , ' American Psychological Association ' ,
2006-08-12 23:23:56 +00:00
' <?xml version="1.0" encoding="UTF-8"?>
2006-09-05 01:09:04 +00:00
< ? oxygen RNGSchema = " ../schema/trunk/csl.rnc " type = " compact " ? >
2006-08-12 23:23:56 +00:00
< style xmlns = " http://purl.org/net/xbiblio/csl " class = " author-date " xml : lang = " en " >
< info >
< title > American Psychological Association < / title >
< id > http : / / purl . org / net / xbiblio / csl / styles / apa . csl < / id >
< link > http : / / purl . org / net / xbiblio / csl / styles / apa . csl < / link >
< author >
< name > Bruce D ’ Arcus < / name >
< email > bdarcus @ sourceforge . net < / email >
< / author >
2006-08-19 23:14:27 +00:00
< contributor >
2006-08-14 05:12:28 +00:00
< name > Simon Kornblith < / name >
< email > simon @ simonster . com < / email >
2006-08-19 23:14:27 +00:00
< / contributor >
2006-08-29 17:29:35 +00:00
< contributor >
< name > Johan Kool < / name >
< email > johankool @ users . sourceforge . net < / email >
< / contributor >
2006-09-05 01:09:04 +00:00
< updated > 2006 - 09 - 04 T20 : 14 : 00 + 05 : 00 < / updated >
2006-08-12 23:23:56 +00:00
< / info >
< defaults >
2006-08-19 23:14:27 +00:00
< contributor name - as - sort - order = " no " >
2006-08-30 04:00:19 +00:00
< name and = " symbol " initialize - with = " . " delimiter = " , " delimiter - precedes - last = " always " / >
< label form = " short " prefix = " , " text - transform = " capitalize " suffix = " . " / >
2006-08-19 23:14:27 +00:00
< / contributor >
2006-08-14 05:12:28 +00:00
< author name - as - sort - order = " all " >
2006-08-30 04:00:19 +00:00
< name and = " symbol " sort - separator = " , " initialize - with = " . " delimiter = " , " delimiter - precedes - last = " always " / >
< label form = " short " prefix = " ( " suffix = " .) " text - transform = " capitalize " / >
2006-08-12 23:23:56 +00:00
< substitute >
< choose >
2006-08-14 05:12:28 +00:00
< editor / >
< translator / >
< titles / >
2006-08-12 23:23:56 +00:00
< / choose >
2006-08-14 05:12:28 +00:00
< / substitute >
2006-08-12 23:23:56 +00:00
< / author >
< locator >
< number / >
< / locator >
< identifier >
< number / >
< / identifier >
< titles >
< title / >
< / titles >
< date >
< year / >
< month prefix = " , " / >
< day prefix = " " / >
< / date >
< publisher >
< place suffix = " : " / >
< name / >
< / publisher >
< access >
2006-08-14 05:12:28 +00:00
< text term - name = " retrieved " text - transform = " capitalize " / >
< date suffix = " , " >
2006-09-04 04:13:12 +00:00
< month suffix = " " / >
2006-08-14 05:12:28 +00:00
< day suffix = " , " / >
< year / >
< / date >
< text term - name = " from " / >
2006-08-12 23:23:56 +00:00
< url / >
< / access >
< / defaults >
< citation prefix = " ( " suffix = " ) " delimiter = " ; " >
< et - al min - authors = " 6 " use - first = " 6 " position = " first " / >
< et - al min - authors = " 6 " use - first = " 1 " position = " subsequent " / >
< layout >
< item >
2006-08-30 04:00:19 +00:00
< author form = " short " >
< name and = " symbol " delimiter = " , " / >
< label form = " short " prefix = " , " text - transform = " capitalize " suffix = " . " / >
< / author >
2006-08-29 04:24:11 +00:00
< date prefix = " , " >
2006-08-12 23:23:56 +00:00
< year / >
< / date >
2006-08-14 05:12:28 +00:00
< locator prefix = " : " / >
2006-08-12 23:23:56 +00:00
< / item >
< / layout >
< / citation >
2006-08-14 05:12:28 +00:00
< bibliography hanging - indent = " true " >
2006-08-12 23:23:56 +00:00
< sort algorithm = " author-date " / >
< et - al min - authors = " 4 " use - first = " 3 " / >
< layout >
< list >
< heading >
< text term - name = " references " / >
< / heading >
< / list >
< item suffix = " . " >
< choose >
< type name = " book " >
< author / >
< date prefix = " ( " suffix = " ). " >
< year / >
< / date >
< group suffix = " . " >
< titles font - style = " italic " prefix = " " / >
2006-08-14 05:12:28 +00:00
< group prefix = " ( " suffix = " ) " delimiter = " , " >
< editor / >
< translator / >
< / group >
2006-08-03 04:54:16 +00:00
< / group >
2006-08-12 23:23:56 +00:00
< publisher prefix = " " / >
2006-08-03 04:54:16 +00:00
< access prefix = " " / >
2006-08-12 23:23:56 +00:00
< / type >
< type name = " chapter " >
< author / >
< date prefix = " ( " suffix = " ). " >
< year / >
< / date >
2006-08-14 05:12:28 +00:00
< titles font - style = " italic " prefix = " " / >
< group class = " container " prefix = " " >
< text term - name = " in " text - transform = " capitalize " / >
< editor prefix = " " suffix = " , " >
< name and = " symbol " sort - separator = " , " initialize - with = " . " / >
2006-08-30 04:00:19 +00:00
< label form = " short " prefix = " ( " suffix = " ) " text - transform = " capitalize " / >
2006-08-14 05:12:28 +00:00
< / editor >
< translator prefix = " " suffix = " , " >
< name and = " symbol " sort - separator = " , " initialize - with = " . " / >
2006-08-30 04:00:19 +00:00
< label form = " short " prefix = " ( " suffix = " ) " text - transform = " capitalize " / >
2006-08-14 05:12:28 +00:00
< / translator >
2006-08-12 23:23:56 +00:00
< titles relation = " container " font - style = " italic " prefix = " " suffix = " . " / >
< titles relation = " collection " prefix = " " suffix = " . " / >
< publisher prefix = " " / >
2006-08-14 05:12:28 +00:00
< pages prefix = " ( " suffix = " ) " >
2006-08-30 04:00:19 +00:00
< label form = " short " text - transform = " capitalize " suffix = " . " / >
2006-08-14 05:12:28 +00:00
< number / >
< / pages >
2006-08-03 04:54:16 +00:00
< / group >
2006-08-14 05:12:28 +00:00
< access prefix = " " / >
2006-08-12 23:23:56 +00:00
< / type >
< type name = " article " >
< author / >
< date prefix = " ( " suffix = " ). " >
< year / >
< / date >
< group suffix = " . " >
2006-08-14 05:12:28 +00:00
< titles prefix = " " / >
< group prefix = " ( " suffix = " ) " delimiter = " , " >
< editor / >
< translator / >
< / group >
2006-08-12 23:23:56 +00:00
< / group >
< group class = " container " prefix = " " suffix = " . " >
2006-08-14 21:54:45 +00:00
< titles relation = " container " font - style = " italic " / >
2006-08-14 05:12:28 +00:00
< volume prefix = " , " font - style = " italic " / >
< issue prefix = " ( " suffix = " ) " / >
< pages prefix = " , " / >
2006-08-12 23:23:56 +00:00
< / group >
2006-08-14 05:12:28 +00:00
< access prefix = " " / >
2006-08-12 23:23:56 +00:00
< / type >
< / choose >
< / item >
< / layout >
< / bibliography >
2006-08-14 20:34:13 +00:00
< / style > ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " csl " VALUES ( ' http://purl.org/net/xbiblio/csl/styles/chicago-note.csl ' , ' 2006-10-02 17:00:00 ' , ' Chicago Manual of Style (Note) ' ,
2006-08-14 20:34:13 +00:00
' <?xml version="1.0" encoding="UTF-8"?>
< ? oxygen RNGSchema = " ../schema/trunk/csl.rnc " type = " compact " ? >
< style xmlns = " http://purl.org/net/xbiblio/csl " class = " note " xml : lang = " en " >
< info >
< title > Chicago Note Sans Reference List < / title >
< id > http : / / purl . org / net / xbiblio / csl / styles / chicago - note . csl < / id >
< author >
< name > Bruce D ’ Arcus < / name >
< email > bdarcus @ sourceforge . net < / email >
< / author >
2006-08-19 23:14:27 +00:00
< contributor >
< name > Simon Kornblith < / name >
< email > simon @ simonster . com < / email >
< / contributor >
2006-08-30 04:00:19 +00:00
< contributor >
< name > Johan Kool < / name >
< email > johankool @ users . sourceforge . net < / email >
< / contributor >
2006-09-05 01:09:04 +00:00
< updated > 2006 - 09 - 04 T20 : 27 : 00 + 05 : 00 < / updated >
2006-08-14 20:34:13 +00:00
< summary > The note - without - bibliography variant of the Chicago style . < / summary >
< / info >
< defaults >
< contributor >
2006-08-30 04:00:19 +00:00
< label form = " short " suffix = " . " text - transform = " lowercase " / >
< name and = " text " delimiter = " , " / >
2006-08-14 20:34:13 +00:00
< / contributor >
< author >
2006-08-30 04:00:19 +00:00
< name and = " text " delimiter = " , " / >
< label form = " short " prefix = " , " suffix = " . " text - transform = " lowercase " / >
2006-08-14 20:34:13 +00:00
< substitute >
< choose >
< editor / >
< translator / >
< / choose >
< / substitute >
< / author >
< locator >
< number / >
< / locator >
< titles >
< title / >
< / titles >
< date >
2006-09-05 01:09:04 +00:00
< month suffix = " " text - transform = " capitalize " / >
< day suffix = " , " / >
2006-08-14 20:34:13 +00:00
< year / >
< / date >
< publisher >
< place suffix = " : " / >
< name / >
< / publisher >
< access >
< url / >
2006-09-05 01:09:04 +00:00
< date prefix = " ( " suffix = " ) " >
< text term - name = " accessed " suffix = " " / >
< month suffix = " " text - transform = " capitalize " / >
< day suffix = " , " / >
< year / >
< / date >
2006-08-14 20:34:13 +00:00
< / access >
< / defaults >
2006-09-05 01:09:04 +00:00
< citation suffix = " . " delimiter = " ; " >
2006-08-14 20:34:13 +00:00
< et - al min - authors = " 4 " use - first = " 1 " / >
< layout >
< item >
< choose >
< type name = " book " >
2006-10-03 21:08:02 +00:00
< group delimiter = " , " >
< author / >
< titles font - style = " italic " / >
< editor / >
< translator / >
< / group >
2006-08-14 20:34:13 +00:00
< group prefix = " ( " suffix = " ) " delimiter = " , " >
< publisher / >
2006-09-05 01:09:04 +00:00
< date >
< year / >
< / date >
2006-08-14 20:34:13 +00:00
< / group >
< pages prefix = " , " / >
2006-09-05 01:09:04 +00:00
< access prefix = " , " / >
2006-08-14 20:34:13 +00:00
< / type >
< type name = " chapter " >
2006-10-03 21:08:02 +00:00
< group delimiter = " , " >
< author / >
< titles quotes = " true " / >
< / group >
2006-08-14 20:34:13 +00:00
< group class = " container " >
< text term - name = " in " text - transform = " lowercase " / >
2006-10-03 21:08:02 +00:00
< group delimiter = " , " >
< titles relation = " container " prefix = " " font - style = " italic " / >
< editor / >
< translator / >
< / group >
2006-08-14 20:34:13 +00:00
< group prefix = " ( " suffix = " ) " delimiter = " , " >
< publisher / >
2006-09-05 01:09:04 +00:00
< date >
< year / >
< / date >
2006-08-14 20:34:13 +00:00
< / group >
2006-09-05 01:09:04 +00:00
< pages prefix = " , " / >
< access prefix = " , " / >
2006-08-14 20:34:13 +00:00
< / group >
< / type >
2006-09-05 01:09:04 +00:00
< type name = " article " >
< group delimiter = " , " >
2006-10-03 21:08:02 +00:00
< author / >
< titles quotes = " true " / >
< titles relation = " container " font - style = " italic " / >
2006-09-05 01:09:04 +00:00
< date / >
< access / >
< / group >
2006-08-14 20:34:13 +00:00
< / type >
2006-09-05 01:09:04 +00:00
< type name = " article-journal " >
2006-10-03 21:08:02 +00:00
< group delimiter = " , " >
< author / >
< titles quotes = " true " / >
< titles relation = " container " font - style = " italic " / >
< / group >
2006-09-05 01:09:04 +00:00
< volume prefix = " " / >
< issue prefix = " , " >
< label form = " short " text - transform = " lowercase " suffix = " . " / >
< number / >
< / issue >
< date prefix = " ( " suffix = " ) " / >
< pages prefix = " : " / >
< access prefix = " , " / >
2006-08-14 20:34:13 +00:00
< / type >
< / choose >
< / item >
< item position = " subsequent " ibid = " true " >
< author / >
< title prefix = " , " / >
< / item >
< / layout >
< / citation >
2006-08-30 06:12:26 +00:00
< / style > ' );
2006-10-02 23:15:27 +00:00
REPLACE INTO " csl " VALUES ( ' http://purl.org/net/xbiblio/csl/styles/mla.csl ' , ' 2006-10-02 17:00:00 ' , ' Modern Language Association ' ,
2006-08-30 06:12:26 +00:00
' <?xml version="1.0" encoding="UTF-8"?>
< ? oxygen RNGSchema = " ../schema/trunk/csl.rnc " type = " compact " ? >
< style xmlns = " http://purl.org/net/xbiblio/csl " class = " author " xml : lang = " en " >
< info >
< title > Modern Language Association < / title >
< id > http : / / purl . org / net / xbiblio / csl / styles / mla . csl < / id >
< link > http : / / purl . org / net / xbiblio / csl / styles / mla . csl < / link >
< author >
< name > Bruce D ’ Arcus < / name >
< email > bdarcus @ sourceforge . net < / email >
< / author >
< contributor >
< name > Johan Kool < / name >
< email > johankool @ users . sourceforge . net < / email >
< / contributor >
< contributor >
< name > Simon Kornblith < / name >
< email > simon @ simonster . com < / email >
< / contributor >
2006-09-05 01:09:04 +00:00
< updated > 2006 - 09 - 04 T20 : 28 : 00 + 05 : 00 < / updated >
2006-08-30 06:12:26 +00:00
< / info >
< defaults >
< contributor name - as - sort - order = " first " >
2006-09-05 01:09:04 +00:00
< name and = " text " sort - separator = " , " delimiter = " , " delimiter - precedes - last = " always " / >
2006-08-30 06:12:26 +00:00
< label form = " short " suffix = " . " / >
< / contributor >
< author >
< substitute >
< choose >
< editor / >
< titles / >
< / choose >
< / substitute >
< / author >
< locator >
< number / >
< / locator >
< titles >
< title / >
< / titles >
< date >
< year / >
< / date >
< publisher >
< place suffix = " : " / >
< name / >
< / publisher >
< access >
< date >
< day suffix = " " / >
< month suffix = " " / >
< year / >
< / date >
2006-09-05 01:09:04 +00:00
< url prefix = " < " suffix = " > " / >
2006-08-30 06:12:26 +00:00
< / access >
< / defaults >
< citation prefix = " ( " suffix = " ) " delimiter = " ; " >
< et - al min - authors = " 6 " use - first = " 6 " position = " first " / >
< et - al min - authors = " 6 " use - first = " 1 " position = " subsequent " / >
< layout >
< item >
< author form = " short " / >
< title form = " short " when - multiple - author - items = " true " prefix = " “ " suffix = " ” " / >
< locator prefix = " " / >
< / item >
< / layout >
< / citation >
< bibliography subsequent - author - substitute = " --- " >
< sort algorithm = " author-date " / >
< et - al min - authors = " 4 " use - first = " 1 " / >
< layout >
< list >
< heading >
< text term - name = " references " / >
< / heading >
< / list >
< item >
< choose >
< type name = " book " >
< author suffix = " . " / >
< titles font - style = " italic " prefix = " " suffix = " . " / >
< group prefix = " " suffix = " . " delimiter = " , " >
< edition / >
< publisher / >
< date / >
< / group >
< access prefix = " " suffix = " . " / >
< / type >
< type name = " chapter " >
< author suffix = " . " / >
< titles prefix = " “ " suffix = " .” " / >
< group class = " container " prefix = " " suffix = " . " >
< titles relation = " container " font - style = " italic " suffix = " . " / >
< editor prefix = " " suffix = " . " >
< label form = " short " suffix = " . " text - transform = " capitalize " / >
< name and = " text " delimiter = " , " / >
< / editor >
< titles relation = " collection " prefix = " " suffix = " . " / >
< publisher prefix = " " / >
< date prefix = " , " / >
< / group >
< pages prefix = " " suffix = " . " / >
< access prefix = " " suffix = " . " / >
< / type >
< type name = " article " >
< author suffix = " . " / >
< titles prefix = " “ " suffix = " .” " / >
< group class = " container " >
< editor prefix = " " suffix = " . " / >
< titles relation = " container " font - style = " italic " prefix = " " suffix = " . " / >
< / group >
< volume prefix = " " / >
< issue prefix = " . " / >
< group prefix = " " suffix = " . " >
< date prefix = " ( " suffix = " ) " / >
< pages prefix = " : " / >
< / group >
< access prefix = " " suffix = " . " / >
< / type >
< / choose >
< / item >
< / layout >
< / bibliography >
2006-08-14 05:12:28 +00:00
< / style > ' );