This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
		
			
				
	
	
		
			68 lines
		
	
	
	
		
			1.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			68 lines
		
	
	
	
		
			1.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Module for handling utf8 just like any other charset.
 | 
						|
 * By Urban Widmark 2000
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/kernel.h>
 | 
						|
#include <linux/string.h>
 | 
						|
#include <linux/nls.h>
 | 
						|
#include <linux/errno.h>
 | 
						|
 | 
						|
static unsigned char identity[256];
 | 
						|
 | 
						|
static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
 | 
						|
{
 | 
						|
	int n;
 | 
						|
 | 
						|
	if (boundlen <= 0)
 | 
						|
		return -ENAMETOOLONG;
 | 
						|
 | 
						|
	n = utf32_to_utf8(uni, out, boundlen);
 | 
						|
	if (n < 0) {
 | 
						|
		*out = '?';
 | 
						|
		return -EINVAL;
 | 
						|
	}
 | 
						|
	return n;
 | 
						|
}
 | 
						|
 | 
						|
static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
 | 
						|
{
 | 
						|
	int n;
 | 
						|
	unicode_t u;
 | 
						|
 | 
						|
	n = utf8_to_utf32(rawstring, boundlen, &u);
 | 
						|
	if (n < 0 || u > MAX_WCHAR_T) {
 | 
						|
		*uni = 0x003f;	/* ? */
 | 
						|
		return -EINVAL;
 | 
						|
	}
 | 
						|
	*uni = (wchar_t) u;
 | 
						|
	return n;
 | 
						|
}
 | 
						|
 | 
						|
static struct nls_table table = {
 | 
						|
	.charset	= "utf8",
 | 
						|
	.uni2char	= uni2char,
 | 
						|
	.char2uni	= char2uni,
 | 
						|
	.charset2lower	= identity,	/* no conversion */
 | 
						|
	.charset2upper	= identity,
 | 
						|
	.owner		= THIS_MODULE,
 | 
						|
};
 | 
						|
 | 
						|
static int __init init_nls_utf8(void)
 | 
						|
{
 | 
						|
	int i;
 | 
						|
	for (i=0; i<256; i++)
 | 
						|
		identity[i] = i;
 | 
						|
 | 
						|
        return register_nls(&table);
 | 
						|
}
 | 
						|
 | 
						|
static void __exit exit_nls_utf8(void)
 | 
						|
{
 | 
						|
        unregister_nls(&table);
 | 
						|
}
 | 
						|
 | 
						|
module_init(init_nls_utf8)
 | 
						|
module_exit(exit_nls_utf8)
 | 
						|
MODULE_LICENSE("Dual BSD/GPL");
 |