Update character set handling

Restore prepopulated charset table, but this time with just the
encodings from the WHATWG Encoding Standard. Assigning a charset to
Zotero.Item::attachmentCharset runs the value through
Zotero.CharacterSets.toCanonical() automatically.

This migrates attachment charsets to the new canonical values, clearing any
that are unsupported.

Other legacy mappings could still be added back, as disussed in #760.
This commit is contained in:
Dan Stillman 2015-06-12 02:20:07 -04:00
parent 4bc5479b19
commit f7216298b4
13 changed files with 91 additions and 46 deletions

View file

@ -7,15 +7,33 @@ describe("Zotero.File", function () {
it("should handle an extended character", function* () {
var contents = yield Zotero.File.getContentsAsync(
OS.Path.join(getTestDataDirectory().path, "utf8Char.txt")
OS.Path.join(getTestDataDirectory().path, "charsets", "utf8.txt")
);
assert.lengthOf(contents, 3);
assert.equal(contents, "A\u72acB");
})
it("should handle an extended Windows-1252 character", function* () {
var contents = yield Zotero.File.getContentsAsync(
OS.Path.join(getTestDataDirectory().path, "charsets", "windows1252.txt"),
"windows-1252"
);
assert.lengthOf(contents, 1);
assert.equal(contents, "\u00E9");
})
it("should handle a GBK character", function* () {
var contents = yield Zotero.File.getContentsAsync(
OS.Path.join(getTestDataDirectory().path, "charsets", "gbk.txt"),
"gbk"
);
assert.lengthOf(contents, 1);
assert.equal(contents, "\u4e02");
})
it("should handle an invalid character", function* () {
var contents = yield Zotero.File.getContentsAsync(
OS.Path.join(getTestDataDirectory().path, "invalidChar.txt")
OS.Path.join(getTestDataDirectory().path, "charsets", "invalid.txt")
);
assert.lengthOf(contents, 3);
assert.equal(contents, "A\uFFFDB");