Blame view
fs/udf/unicode.c
8.76 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
/* * unicode.c * * PURPOSE * Routines for converting between UTF-8 and OSTA Compressed Unicode. * Also handles filename mangling * * DESCRIPTION * OSTA Compressed Unicode is explained in the OSTA UDF specification. * http://www.osta.org/ * UTF-8 is explained in the IETF RFC XXXX. * ftp://ftp.internic.net/rfc/rfcxxxx.txt * |
1da177e4c Linux-2.6.12-rc2 |
14 15 16 17 18 19 20 21 22 23 24 25 |
* COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: * ftp://prep.ai.mit.edu/pub/gnu/GPL * Each contributing author retains all rights to their own work. */ #include "udfdecl.h" #include <linux/kernel.h> #include <linux/string.h> /* for memset */ #include <linux/nls.h> |
f845fced9 udf: use crc_itu_... |
26 |
#include <linux/crc-itu-t.h> |
5a0e3ad6a include cleanup: ... |
27 |
#include <linux/slab.h> |
1da177e4c Linux-2.6.12-rc2 |
28 29 |
#include "udf_sb.h" |
3e7fc2055 udf: Join functio... |
30 31 32 |
static int udf_uni2char_utf8(wchar_t uni, unsigned char *out, int boundlen) |
1da177e4c Linux-2.6.12-rc2 |
33 |
{ |
3e7fc2055 udf: Join functio... |
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
int u_len = 0; if (boundlen <= 0) return -ENAMETOOLONG; if (uni < 0x80) { out[u_len++] = (unsigned char)uni; } else if (uni < 0x800) { if (boundlen < 2) return -ENAMETOOLONG; out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); } else { if (boundlen < 3) return -ENAMETOOLONG; out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); |
1da177e4c Linux-2.6.12-rc2 |
52 |
} |
3e7fc2055 udf: Join functio... |
53 |
return u_len; |
1da177e4c Linux-2.6.12-rc2 |
54 |
} |
3e7fc2055 udf: Join functio... |
55 56 57 |
static int udf_char2uni_utf8(const unsigned char *in, int boundlen, wchar_t *uni) |
1da177e4c Linux-2.6.12-rc2 |
58 |
{ |
3e7fc2055 udf: Join functio... |
59 60 61 |
unsigned int utf_char; unsigned char c; int utf_cnt, u_len; |
bb00c898a udf: Check output... |
62 |
|
3e7fc2055 udf: Join functio... |
63 64 65 66 |
utf_char = 0; utf_cnt = 0; for (u_len = 0; u_len < boundlen;) { c = in[u_len++]; |
1da177e4c Linux-2.6.12-rc2 |
67 68 |
/* Complete a multi-byte UTF-8 character */ |
cb00ea352 UDF: coding style... |
69 |
if (utf_cnt) { |
3e7fc2055 udf: Join functio... |
70 |
utf_char = (utf_char << 6) | (c & 0x3f); |
1da177e4c Linux-2.6.12-rc2 |
71 72 |
if (--utf_cnt) continue; |
cb00ea352 UDF: coding style... |
73 |
} else { |
1da177e4c Linux-2.6.12-rc2 |
74 |
/* Check for a multi-byte UTF-8 character */ |
3e7fc2055 udf: Join functio... |
75 |
if (c & 0x80) { |
1da177e4c Linux-2.6.12-rc2 |
76 |
/* Start a multi-byte UTF-8 character */ |
3e7fc2055 udf: Join functio... |
77 78 |
if ((c & 0xe0) == 0xc0) { utf_char = c & 0x1f; |
1da177e4c Linux-2.6.12-rc2 |
79 |
utf_cnt = 1; |
3e7fc2055 udf: Join functio... |
80 81 |
} else if ((c & 0xf0) == 0xe0) { utf_char = c & 0x0f; |
1da177e4c Linux-2.6.12-rc2 |
82 |
utf_cnt = 2; |
3e7fc2055 udf: Join functio... |
83 84 |
} else if ((c & 0xf8) == 0xf0) { utf_char = c & 0x07; |
1da177e4c Linux-2.6.12-rc2 |
85 |
utf_cnt = 3; |
3e7fc2055 udf: Join functio... |
86 87 |
} else if ((c & 0xfc) == 0xf8) { utf_char = c & 0x03; |
1da177e4c Linux-2.6.12-rc2 |
88 |
utf_cnt = 4; |
3e7fc2055 udf: Join functio... |
89 90 |
} else if ((c & 0xfe) == 0xfc) { utf_char = c & 0x01; |
1da177e4c Linux-2.6.12-rc2 |
91 |
utf_cnt = 5; |
28de7948a UDF: coding style... |
92 |
} else { |
3e7fc2055 udf: Join functio... |
93 94 |
utf_cnt = -1; break; |
28de7948a UDF: coding style... |
95 |
} |
1da177e4c Linux-2.6.12-rc2 |
96 |
continue; |
28de7948a UDF: coding style... |
97 |
} else { |
1da177e4c Linux-2.6.12-rc2 |
98 99 |
/* Single byte UTF-8 character (most common) */ utf_char = c; |
28de7948a UDF: coding style... |
100 |
} |
1da177e4c Linux-2.6.12-rc2 |
101 |
} |
3e7fc2055 udf: Join functio... |
102 103 |
*uni = utf_char; break; |
1da177e4c Linux-2.6.12-rc2 |
104 |
} |
cb00ea352 UDF: coding style... |
105 |
if (utf_cnt) { |
3e7fc2055 udf: Join functio... |
106 107 |
*uni = '?'; return -EINVAL; |
1da177e4c Linux-2.6.12-rc2 |
108 |
} |
3e7fc2055 udf: Join functio... |
109 |
return u_len; |
1da177e4c Linux-2.6.12-rc2 |
110 |
} |
484a10f49 udf: Merge linux ... |
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
#define ILLEGAL_CHAR_MARK '_' #define EXT_MARK '.' #define CRC_MARK '#' #define EXT_SIZE 5 /* Number of chars we need to store generated CRC to make filename unique */ #define CRC_LEN 5 static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, int *str_o_idx, const uint8_t *str_i, int str_i_max_len, int *str_i_idx, int u_ch, int *needsCRC, int (*conv_f)(wchar_t, unsigned char *, int), int translate) { uint32_t c; int illChar = 0; int len, gotch = 0; for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) { if (*str_o_idx >= str_o_max_len) { *needsCRC = 1; return gotch; } /* Expand OSTA compressed Unicode to Unicode */ c = str_i[*str_i_idx]; if (u_ch > 1) c = (c << 8) | str_i[*str_i_idx + 1]; if (translate && (c == '/' || c == 0)) illChar = 1; else if (illChar) break; else gotch = 1; } if (illChar) { *needsCRC = 1; c = ILLEGAL_CHAR_MARK; gotch = 1; } if (gotch) { len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx); /* Valid character? */ if (len >= 0) *str_o_idx += len; else if (len == -ENAMETOOLONG) { *needsCRC = 1; gotch = 0; } else { str_o[(*str_o_idx)++] = '?'; *needsCRC = 1; } } return gotch; } |
9293fcfbc udf: Remove struc... |
168 169 |
static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, const uint8_t *ocu, int ocu_len, |
484a10f49 udf: Merge linux ... |
170 171 |
int (*conv_f)(wchar_t, unsigned char *, int), int translate) |
1da177e4c Linux-2.6.12-rc2 |
172 |
{ |
484a10f49 udf: Merge linux ... |
173 |
uint32_t c; |
9293fcfbc udf: Remove struc... |
174 |
uint8_t cmp_id; |
484a10f49 udf: Merge linux ... |
175 176 177 178 179 180 181 182 183 184 185 186 |
int idx, len; int u_ch; int needsCRC = 0; int ext_i_len, ext_max_len; int str_o_len = 0; /* Length of resulting output */ int ext_o_len = 0; /* Extension output length */ int ext_crc_len = 0; /* Extension output length if used with CRC */ int i_ext = -1; /* Extension position in input buffer */ int o_crc = 0; /* Rightmost possible output pos for CRC+ext */ unsigned short valueCRC; uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; uint8_t crc[CRC_LEN]; |
1da177e4c Linux-2.6.12-rc2 |
187 |
|
9293fcfbc udf: Remove struc... |
188 189 |
if (str_max_len <= 0) return 0; |
1da177e4c Linux-2.6.12-rc2 |
190 |
|
cb00ea352 UDF: coding style... |
191 |
if (ocu_len == 0) { |
9293fcfbc udf: Remove struc... |
192 |
memset(str_o, 0, str_max_len); |
1da177e4c Linux-2.6.12-rc2 |
193 194 |
return 0; } |
9293fcfbc udf: Remove struc... |
195 |
cmp_id = ocu[0]; |
34f953ddf udf: udf_CS0toNLS... |
196 |
if (cmp_id != 8 && cmp_id != 16) { |
9293fcfbc udf: Remove struc... |
197 |
memset(str_o, 0, str_max_len); |
484a10f49 udf: Merge linux ... |
198 199 |
pr_err("unknown compression code (%d) ", cmp_id); |
78fc2e694 udf: improve erro... |
200 |
return -EINVAL; |
1da177e4c Linux-2.6.12-rc2 |
201 |
} |
484a10f49 udf: Merge linux ... |
202 |
u_ch = cmp_id >> 3; |
1da177e4c Linux-2.6.12-rc2 |
203 |
|
484a10f49 udf: Merge linux ... |
204 205 |
ocu++; ocu_len--; |
1da177e4c Linux-2.6.12-rc2 |
206 |
|
484a10f49 udf: Merge linux ... |
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
if (ocu_len % u_ch) { pr_err("incorrect filename length (%d) ", ocu_len + 1); return -EINVAL; } if (translate) { /* Look for extension */ for (idx = ocu_len - u_ch, ext_i_len = 0; (idx >= 0) && (ext_i_len < EXT_SIZE); idx -= u_ch, ext_i_len++) { c = ocu[idx]; if (u_ch > 1) c = (c << 8) | ocu[idx + 1]; if (c == EXT_MARK) { if (ext_i_len) i_ext = idx; break; } } if (i_ext >= 0) { /* Convert extension */ ext_max_len = min_t(int, sizeof(ext), str_max_len); ext[ext_o_len++] = EXT_MARK; idx = i_ext + u_ch; while (udf_name_conv_char(ext, ext_max_len, &ext_o_len, ocu, ocu_len, &idx, u_ch, &needsCRC, conv_f, translate)) { if ((ext_o_len + CRC_LEN) < str_max_len) ext_crc_len = ext_o_len; } } } idx = 0; while (1) { if (translate && (idx == i_ext)) { if (str_o_len > (str_max_len - ext_o_len)) needsCRC = 1; |
3e7fc2055 udf: Join functio... |
248 |
break; |
484a10f49 udf: Merge linux ... |
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
} if (!udf_name_conv_char(str_o, str_max_len, &str_o_len, ocu, ocu_len, &idx, u_ch, &needsCRC, conv_f, translate)) break; if (translate && (str_o_len <= (str_max_len - ext_o_len - CRC_LEN))) o_crc = str_o_len; } if (translate) { if (str_o_len <= 2 && str_o[0] == '.' && (str_o_len == 1 || str_o[1] == '.')) needsCRC = 1; if (needsCRC) { str_o_len = o_crc; valueCRC = crc_itu_t(0, ocu, ocu_len); crc[0] = CRC_MARK; crc[1] = hex_asc_upper_hi(valueCRC >> 8); crc[2] = hex_asc_upper_lo(valueCRC >> 8); crc[3] = hex_asc_upper_hi(valueCRC); crc[4] = hex_asc_upper_lo(valueCRC); len = min_t(int, CRC_LEN, str_max_len - str_o_len); memcpy(&str_o[str_o_len], crc, len); str_o_len += len; ext_o_len = ext_crc_len; } if (ext_o_len > 0) { memcpy(&str_o[str_o_len], ext, ext_o_len); str_o_len += ext_o_len; } |
1da177e4c Linux-2.6.12-rc2 |
282 |
} |
1da177e4c Linux-2.6.12-rc2 |
283 |
|
9293fcfbc udf: Remove struc... |
284 |
return str_o_len; |
1da177e4c Linux-2.6.12-rc2 |
285 |
} |
9293fcfbc udf: Remove struc... |
286 287 |
static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len, const uint8_t *str_i, int str_len, |
3e7fc2055 udf: Join functio... |
288 |
int (*conv_f)(const unsigned char *, int, wchar_t *)) |
1da177e4c Linux-2.6.12-rc2 |
289 |
{ |
3e7fc2055 udf: Join functio... |
290 291 292 |
int i, len; unsigned int max_val; wchar_t uni_char; |
bb00c898a udf: Check output... |
293 |
int u_len, u_ch; |
1da177e4c Linux-2.6.12-rc2 |
294 |
|
9293fcfbc udf: Remove struc... |
295 296 297 298 |
if (ocu_max_len <= 0) return 0; memset(ocu, 0, ocu_max_len); |
1da177e4c Linux-2.6.12-rc2 |
299 |
ocu[0] = 8; |
3e7fc2055 udf: Join functio... |
300 |
max_val = 0xff; |
bb00c898a udf: Check output... |
301 |
u_ch = 1; |
1da177e4c Linux-2.6.12-rc2 |
302 |
|
28de7948a UDF: coding style... |
303 |
try_again: |
9293fcfbc udf: Remove struc... |
304 305 |
u_len = 1; for (i = 0; i < str_len; i++) { |
bb00c898a udf: Check output... |
306 |
/* Name didn't fit? */ |
9293fcfbc udf: Remove struc... |
307 |
if (u_len + u_ch > ocu_max_len) |
bb00c898a udf: Check output... |
308 |
return 0; |
9293fcfbc udf: Remove struc... |
309 |
len = conv_f(&str_i[i], str_len - i, &uni_char); |
59285c28d udf: Fix oops whe... |
310 |
if (!len) |
1da177e4c Linux-2.6.12-rc2 |
311 |
continue; |
59285c28d udf: Fix oops whe... |
312 313 314 315 316 |
/* Invalid character, deal with it */ if (len < 0) { len = 1; uni_char = '?'; } |
1da177e4c Linux-2.6.12-rc2 |
317 |
|
cb00ea352 UDF: coding style... |
318 |
if (uni_char > max_val) { |
3e7fc2055 udf: Join functio... |
319 320 |
max_val = 0xffff; ocu[0] = 0x10; |
bb00c898a udf: Check output... |
321 |
u_ch = 2; |
1da177e4c Linux-2.6.12-rc2 |
322 323 |
goto try_again; } |
cb00ea352 UDF: coding style... |
324 |
|
3e7fc2055 udf: Join functio... |
325 |
if (max_val == 0xffff) |
9293fcfbc udf: Remove struc... |
326 327 |
ocu[u_len++] = (uint8_t)(uni_char >> 8); ocu[u_len++] = (uint8_t)(uni_char & 0xff); |
1da177e4c Linux-2.6.12-rc2 |
328 329 |
i += len - 1; } |
9293fcfbc udf: Remove struc... |
330 |
return u_len; |
1da177e4c Linux-2.6.12-rc2 |
331 |
} |
c26f6c615 udf: Fix conversi... |
332 333 |
int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) |
3e7fc2055 udf: Join functio... |
334 |
{ |
c26f6c615 udf: Fix conversi... |
335 336 337 338 339 340 341 342 343 344 345 346 347 |
int s_len = 0; if (i_len > 0) { s_len = ocu_i[i_len - 1]; if (s_len >= i_len) { pr_err("incorrect dstring lengths (%d/%d) ", s_len, i_len); return -EINVAL; } } return udf_name_from_CS0(utf_o, o_len, ocu_i, s_len, |
484a10f49 udf: Merge linux ... |
348 |
udf_uni2char_utf8, 0); |
3e7fc2055 udf: Join functio... |
349 |
} |
9293fcfbc udf: Remove struc... |
350 |
int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, |
0e5cc9a40 udf: Check path l... |
351 |
uint8_t *dname, int dlen) |
1da177e4c Linux-2.6.12-rc2 |
352 |
{ |
3e7fc2055 udf: Join functio... |
353 |
int (*conv_f)(wchar_t, unsigned char *, int); |
6ce638367 udf: Make udf_get... |
354 |
int ret; |
1da177e4c Linux-2.6.12-rc2 |
355 |
|
31f2566f3 udf: remove unnec... |
356 357 |
if (!slen) return -EIO; |
9293fcfbc udf: Remove struc... |
358 359 |
if (dlen <= 0) return 0; |
cb00ea352 UDF: coding style... |
360 |
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { |
3e7fc2055 udf: Join functio... |
361 |
conv_f = udf_uni2char_utf8; |
cb00ea352 UDF: coding style... |
362 |
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { |
3e7fc2055 udf: Join functio... |
363 |
conv_f = UDF_SB(sb)->s_nls_map->uni2char; |
4b11111ab udf: fix coding s... |
364 |
} else |
5dce54b71 udf: bug on exoti... |
365 |
BUG(); |
530f1a5e3 udf: reduce stack... |
366 |
|
484a10f49 udf: Merge linux ... |
367 |
ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1); |
6ce638367 udf: Make udf_get... |
368 369 370 |
/* Zero length filename isn't valid... */ if (ret == 0) ret = -EINVAL; |
5ceb8b554 udf: Return -ENOM... |
371 |
return ret; |
1da177e4c Linux-2.6.12-rc2 |
372 |
} |
525e2c56c udf: Parameterize... |
373 374 |
int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, uint8_t *dname, int dlen) |
1da177e4c Linux-2.6.12-rc2 |
375 |
{ |
3e7fc2055 udf: Join functio... |
376 |
int (*conv_f)(const unsigned char *, int, wchar_t *); |
1da177e4c Linux-2.6.12-rc2 |
377 |
|
cb00ea352 UDF: coding style... |
378 |
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { |
3e7fc2055 udf: Join functio... |
379 |
conv_f = udf_char2uni_utf8; |
cb00ea352 UDF: coding style... |
380 |
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { |
3e7fc2055 udf: Join functio... |
381 |
conv_f = UDF_SB(sb)->s_nls_map->char2uni; |
4b11111ab udf: fix coding s... |
382 |
} else |
3e7fc2055 udf: Join functio... |
383 |
BUG(); |
1da177e4c Linux-2.6.12-rc2 |
384 |
|
9293fcfbc udf: Remove struc... |
385 |
return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); |
1da177e4c Linux-2.6.12-rc2 |
386 |
} |