Server: Add support for UTF in server headers (RFC2047)

This commit is contained in:
Adomas Venčkauskas 2025-07-24 12:11:19 +03:00 committed by Dan Stillman
parent 81d9ed77e9
commit fa52445ae6
2 changed files with 144 additions and 4 deletions

View file

@ -142,6 +142,78 @@ Zotero.Server.networkStreamToString = function (stream, length) {
return Zotero.Utilities.Internal.decodeUTF8(data);
};
/**
* Decode RFC 2047 encoded headers
* Supports both Q-encoding (quoted-printable) and B-encoding (base64)
*
* @param {String} value - Header value to decode
* @return {String} Decoded header value
*/
Zotero.Server.decodeRFC2047 = function (value) {
// RFC 2047-decode the result.
// Process encoded words anywhere in the header value, as per RFC 2047 section 5
// which allows ordinary ASCII text and encoded words to appear together.
// Helper function to convert string to bytes
function stringToBytes(str) {
if (typeof str !== "string") {
throw new Error("Invalid argument for stringToBytes");
}
const length = str.length;
const bytes = new Uint8Array(length);
for (let i = 0; i < length; ++i) {
bytes[i] = str.charCodeAt(i) & 0xff;
}
return bytes;
}
// Helper function to decode text with specified encoding
function textDecode(encoding, value) {
if (encoding) {
if (!/^[\x00-\xFF]+$/.test(value)) {
return value;
}
try {
const decoder = new TextDecoder(encoding, { fatal: true });
const buffer = stringToBytes(value);
value = decoder.decode(buffer);
}
// TextDecoder constructor threw - unrecognized encoding.
catch {
Zotero.debug(`decodeRFC2047: Unrecognized encoding: ${encoding}`, 1);
}
}
return value;
}
// RFC 2047, section 2.4
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
// charset = token (but let's restrict to characters that denote a
// possibly valid encoding).
// encoding = q or b
// encoded-text = any printable ASCII character other than ? or space.
return value.replace(
/=\?([\w-]+)\?([QqBb])\?((?:[^? ]|\?(?!=))*)\?=/g,
(matches, charset, encoding, text) => {
if (encoding === "q" || encoding === "Q") {
// RFC 2047 section 4.2.
text = text.replace(/_/g, " ");
text = text.replace(/=([0-9a-fA-F]{2})/g, (match, hex) => {
return String.fromCharCode(parseInt(hex, 16));
});
return textDecode(charset, text);
}
// else encoding is b or B - base64 (RFC 2047 section 4.1)
try {
text = atob(text);
}
catch {
Zotero.debug(`decodeRFC2047: Invalid base64: ${text}`, 1);
}
return textDecode(charset, text);
}
);
};
Zotero.Server.RequestHandler = function (request, response) {
this.body = "";
@ -237,8 +309,10 @@ Zotero.Server.RequestHandler.prototype.handleRequest = async function () {
// Parse headers into this.headers with lowercase names
this.headers = new Zotero.Server.Headers();
for (let { data: name } of request.headers) {
requestDebug += `${name}: ${request.getHeader(name)}\n`;
this.headers[name.toLowerCase()] = request.getHeader(name);
let headerValue = request.getHeader(name);
requestDebug += `${name}: ${headerValue}\n`;
// Decode RFC 2047 encoded header values
this.headers[name.toLowerCase()] = Zotero.Server.decodeRFC2047(headerValue);
}
Zotero.debug(requestDebug, 5);
@ -531,7 +605,9 @@ Zotero.Server.RequestHandler.prototype._decodeMultipartData = function(data) {
contentDisposition.shift();
for (let param of contentDisposition) {
let nameVal = param.trim().split('=');
fieldData.params[nameVal[0]] = nameVal[1].trim().slice(1, -1);
// Apply RFC 2047 decoding to parameter values
let paramValue = nameVal[1].trim().slice(1, -1);
fieldData.params[nameVal[0]] = Zotero.Server.decodeRFC2047(paramValue);
}
}
}
@ -540,7 +616,8 @@ Zotero.Server.RequestHandler.prototype._decodeMultipartData = function(data) {
// Content-Type: image/png
let contentType = header.split(':');
if (contentType.length > 1) {
fieldData.params.contentType = contentType[1].trim();
// Apply RFC 2047 decoding to content type
fieldData.params.contentType = Zotero.Server.decodeRFC2047(contentType[1].trim());
}
}
}

View file

@ -331,6 +331,69 @@ describe("Zotero.Server", function () {
assert.ok(called);
assert.equal(req.status, 204);
});
it('should decode UTF-8 quoted-printable encoded custom headers', async function () {
let called = false;
let endpoint = "/test/" + Zotero.Utilities.randomString();
let file = getTestDataDirectory();
file.append('test.pdf');
// Create RFC 2047 Q-encoded header value
let originalText = "Hello 🌐";
// Convert to UTF-8 bytes then Q-encode
const utf8Bytes = new TextEncoder().encode(originalText);
let encoded = '';
for (let byte of utf8Bytes) {
// Encode spaces as underscores, other special chars as =XX
if (byte === 32) { // space
encoded += '_';
}
else if (byte >= 33 && byte <= 126 && byte !== 61 && byte !== 63 && byte !== 95) {
// Printable ASCII except =, ?, _
encoded += String.fromCharCode(byte);
}
else {
encoded += '=' + byte.toString(16).toUpperCase().padStart(2, '0');
}
}
let rfc2047Header = `=?utf-8?Q?${encoded}?=`;
Zotero.Server.Endpoints[endpoint] = function () {};
Zotero.Server.Endpoints[endpoint].prototype = {
supportedMethods: ["POST"],
supportedDataTypes: ["application/pdf"],
init: function (options) {
called = true;
assert.isObject(options);
assert.property(options.headers, "custom-header");
// The encoded header should decode back to the original text
assert.equal(options.headers["custom-header"], originalText);
return 204;
}
};
let pdf = await File.createFromFileName(OS.Path.join(getTestDataDirectory().path, 'test.pdf'));
let req = await Zotero.HTTP.request(
"POST",
serverPath + endpoint,
{
headers: {
"Content-Type": "application/pdf",
// Use the dynamically created RFC 2047 Q-encoded header
"Custom-Header": rfc2047Header
},
body: pdf
}
);
assert.ok(called);
assert.equal(req.status, 204);
});
});
});
});