Commit f46ba2235feab5e686b1234c328a0577cde86e21
Committed by
Linus Torvalds
1 parent
15ad7cdcfd
Exists in
master
and in
4 other branches
[PATCH] fs: make nls_cp936.c handle some U00XY characters and U20AC correctly
Twenty characters in cp936 are not correctly handled. They're all in the U00 plane. nls_cp936 converts all U00XY to XY but this is not correct for some characters.(e.g. U00B7 -> A1A4, U00A8 -> A1A7). This problem is fixed by generating u2c_00 based on all c2u_xx and changing uni2char() to give U00 plane a special handling. The "â¬"(U20AC,80 in cp936) is also be handled properly. Acked-by: Gang Chen <cgdlut@gmail.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 1 changed file with 103 additions and 10 deletions Side-by-side Diff
fs/nls/nls_cp936.c
... | ... | @@ -4421,6 +4421,73 @@ |
4421 | 4421 | c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL, |
4422 | 4422 | }; |
4423 | 4423 | |
4424 | +static unsigned char u2c_00[512] = { | |
4425 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ | |
4426 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ | |
4427 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ | |
4428 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ | |
4429 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ | |
4430 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ | |
4431 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ | |
4432 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ | |
4433 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ | |
4434 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ | |
4435 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ | |
4436 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ | |
4437 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ | |
4438 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ | |
4439 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ | |
4440 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ | |
4441 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ | |
4442 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ | |
4443 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ | |
4444 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ | |
4445 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ | |
4446 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ | |
4447 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ | |
4448 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ | |
4449 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ | |
4450 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ | |
4451 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ | |
4452 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ | |
4453 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ | |
4454 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ | |
4455 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ | |
4456 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ | |
4457 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ | |
4458 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ | |
4459 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ | |
4460 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ | |
4461 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ | |
4462 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ | |
4463 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ | |
4464 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ | |
4465 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ | |
4466 | + 0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */ | |
4467 | + 0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ | |
4468 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ | |
4469 | + 0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ | |
4470 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */ | |
4471 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ | |
4472 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ | |
4473 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ | |
4474 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */ | |
4475 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ | |
4476 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ | |
4477 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ | |
4478 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */ | |
4479 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ | |
4480 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */ | |
4481 | + 0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ | |
4482 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ | |
4483 | + 0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */ | |
4484 | + 0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */ | |
4485 | + 0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */ | |
4486 | + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */ | |
4487 | + 0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */ | |
4488 | + 0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ | |
4489 | +}; | |
4490 | + | |
4424 | 4491 | static unsigned char u2c_01[512] = { |
4425 | 4492 | 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ |
4426 | 4493 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ |
... | ... | @@ -10825,7 +10892,7 @@ |
10825 | 10892 | }; |
10826 | 10893 | |
10827 | 10894 | static unsigned char *page_uni2charset[256] = { |
10828 | - NULL, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, | |
10895 | + u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, | |
10829 | 10896 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
10830 | 10897 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
10831 | 10898 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
10832 | 10899 | |
10833 | 10900 | |
... | ... | @@ -10936,12 +11003,35 @@ |
10936 | 11003 | unsigned char *uni2charset; |
10937 | 11004 | unsigned char cl = uni&0xFF; |
10938 | 11005 | unsigned char ch = (uni>>8)&0xFF; |
10939 | - int n; | |
11006 | + unsigned char out0,out1; | |
10940 | 11007 | |
10941 | 11008 | if (boundlen <= 0) |
10942 | 11009 | return -ENAMETOOLONG; |
10943 | 11010 | |
11011 | + if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */ | |
11012 | + out[0] = 0x80; | |
11013 | + return 1; | |
11014 | + } | |
10944 | 11015 | |
11016 | + if (ch == 0) { /* handle the U00 plane*/ | |
11017 | + /* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/ | |
11018 | + out0 = u2c_00[cl*2]; | |
11019 | + out1 = u2c_00[cl*2+1]; | |
11020 | + if (out0 == 0x00 && out1 == 0x00) { | |
11021 | + if (cl<0x80) { | |
11022 | + out[0] = cl; | |
11023 | + return 1; | |
11024 | + } | |
11025 | + return -EINVAL; | |
11026 | + } else { | |
11027 | + if (boundlen <= 1) | |
11028 | + return -ENAMETOOLONG; | |
11029 | + out[0] = out0; | |
11030 | + out[1] = out1; | |
11031 | + return 2; | |
11032 | + } | |
11033 | + } | |
11034 | + | |
10945 | 11035 | uni2charset = page_uni2charset[ch]; |
10946 | 11036 | if (uni2charset) { |
10947 | 11037 | if (boundlen <= 1) |
10948 | 11038 | |
... | ... | @@ -10950,15 +11040,10 @@ |
10950 | 11040 | out[1] = uni2charset[cl*2+1]; |
10951 | 11041 | if (out[0] == 0x00 && out[1] == 0x00) |
10952 | 11042 | return -EINVAL; |
10953 | - n = 2; | |
10954 | - } else if (ch==0 && cl) { | |
10955 | - out[0] = cl; | |
10956 | - n = 1; | |
11043 | + return 2; | |
10957 | 11044 | } |
10958 | 11045 | else |
10959 | 11046 | return -EINVAL; |
10960 | - | |
10961 | - return n; | |
10962 | 11047 | } |
10963 | 11048 | |
10964 | 11049 | static int char2uni(const unsigned char *rawstring, int boundlen, |
... | ... | @@ -10972,7 +11057,11 @@ |
10972 | 11057 | return -ENAMETOOLONG; |
10973 | 11058 | |
10974 | 11059 | if (boundlen == 1) { |
10975 | - *uni = rawstring[0]; | |
11060 | + if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */ | |
11061 | + *uni = 0x20ac; | |
11062 | + } else { | |
11063 | + *uni = rawstring[0]; | |
11064 | + } | |
10976 | 11065 | return 1; |
10977 | 11066 | } |
10978 | 11067 | |
... | ... | @@ -10986,7 +11075,11 @@ |
10986 | 11075 | return -EINVAL; |
10987 | 11076 | n = 2; |
10988 | 11077 | } else{ |
10989 | - *uni = ch; | |
11078 | + if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */ | |
11079 | + *uni = 0x20ac; | |
11080 | + } else { | |
11081 | + *uni = ch; | |
11082 | + } | |
10990 | 11083 | n = 1; |
10991 | 11084 | } |
10992 | 11085 | return n; |