Commit f46ba2235feab5e686b1234c328a0577cde86e21

Authored by Jun Chen
Committed by Linus Torvalds
1 parent 15ad7cdcfd

[PATCH] fs: make nls_cp936.c handle some U00XY characters and U20AC correctly

Twenty characters in cp936 are not correctly handled.  They're all in the
U00 plane.  nls_cp936 converts all U00XY to XY but this is not correct for
some characters.(e.g.  U00B7 -> A1A4, U00A8 -> A1A7).

This problem is fixed by generating u2c_00 based on all c2u_xx and changing
uni2char() to give U00 plane a special handling.  The "€"(U20AC,80 in
cp936) is also be handled properly.

Acked-by: Gang Chen <cgdlut@gmail.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 1 changed file with 103 additions and 10 deletions Side-by-side Diff

... ... @@ -4421,6 +4421,73 @@
4421 4421 c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL,
4422 4422 };
4423 4423  
  4424 +static unsigned char u2c_00[512] = {
  4425 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
  4426 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
  4427 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */
  4428 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */
  4429 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */
  4430 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */
  4431 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */
  4432 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */
  4433 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */
  4434 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */
  4435 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */
  4436 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */
  4437 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */
  4438 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */
  4439 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */
  4440 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */
  4441 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */
  4442 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */
  4443 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */
  4444 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */
  4445 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */
  4446 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */
  4447 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */
  4448 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */
  4449 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */
  4450 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */
  4451 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */
  4452 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */
  4453 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */
  4454 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */
  4455 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */
  4456 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */
  4457 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */
  4458 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */
  4459 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */
  4460 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */
  4461 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */
  4462 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */
  4463 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */
  4464 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */
  4465 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */
  4466 + 0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */
  4467 + 0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */
  4468 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */
  4469 + 0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */
  4470 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */
  4471 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */
  4472 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */
  4473 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */
  4474 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */
  4475 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */
  4476 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */
  4477 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */
  4478 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */
  4479 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */
  4480 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */
  4481 + 0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */
  4482 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */
  4483 + 0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */
  4484 + 0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */
  4485 + 0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */
  4486 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */
  4487 + 0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */
  4488 + 0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */
  4489 +};
  4490 +
4424 4491 static unsigned char u2c_01[512] = {
4425 4492 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
4426 4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
... ... @@ -10825,7 +10892,7 @@
10825 10892 };
10826 10893  
10827 10894 static unsigned char *page_uni2charset[256] = {
10828   - NULL, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL,
  10895 + u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL,
10829 10896 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
10830 10897 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
10831 10898 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
10832 10899  
10833 10900  
... ... @@ -10936,12 +11003,35 @@
10936 11003 unsigned char *uni2charset;
10937 11004 unsigned char cl = uni&0xFF;
10938 11005 unsigned char ch = (uni>>8)&0xFF;
10939   - int n;
  11006 + unsigned char out0,out1;
10940 11007  
10941 11008 if (boundlen <= 0)
10942 11009 return -ENAMETOOLONG;
10943 11010  
  11011 + if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */
  11012 + out[0] = 0x80;
  11013 + return 1;
  11014 + }
10944 11015  
  11016 + if (ch == 0) { /* handle the U00 plane*/
  11017 + /* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/
  11018 + out0 = u2c_00[cl*2];
  11019 + out1 = u2c_00[cl*2+1];
  11020 + if (out0 == 0x00 && out1 == 0x00) {
  11021 + if (cl<0x80) {
  11022 + out[0] = cl;
  11023 + return 1;
  11024 + }
  11025 + return -EINVAL;
  11026 + } else {
  11027 + if (boundlen <= 1)
  11028 + return -ENAMETOOLONG;
  11029 + out[0] = out0;
  11030 + out[1] = out1;
  11031 + return 2;
  11032 + }
  11033 + }
  11034 +
10945 11035 uni2charset = page_uni2charset[ch];
10946 11036 if (uni2charset) {
10947 11037 if (boundlen <= 1)
10948 11038  
... ... @@ -10950,15 +11040,10 @@
10950 11040 out[1] = uni2charset[cl*2+1];
10951 11041 if (out[0] == 0x00 && out[1] == 0x00)
10952 11042 return -EINVAL;
10953   - n = 2;
10954   - } else if (ch==0 && cl) {
10955   - out[0] = cl;
10956   - n = 1;
  11043 + return 2;
10957 11044 }
10958 11045 else
10959 11046 return -EINVAL;
10960   -
10961   - return n;
10962 11047 }
10963 11048  
10964 11049 static int char2uni(const unsigned char *rawstring, int boundlen,
... ... @@ -10972,7 +11057,11 @@
10972 11057 return -ENAMETOOLONG;
10973 11058  
10974 11059 if (boundlen == 1) {
10975   - *uni = rawstring[0];
  11060 + if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */
  11061 + *uni = 0x20ac;
  11062 + } else {
  11063 + *uni = rawstring[0];
  11064 + }
10976 11065 return 1;
10977 11066 }
10978 11067  
... ... @@ -10986,7 +11075,11 @@
10986 11075 return -EINVAL;
10987 11076 n = 2;
10988 11077 } else{
10989   - *uni = ch;
  11078 + if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */
  11079 + *uni = 0x20ac;
  11080 + } else {
  11081 + *uni = ch;
  11082 + }
10990 11083 n = 1;
10991 11084 }
10992 11085 return n;