Blame view

fs/hfsplus/unicode.c 10.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  /*
   *  linux/fs/hfsplus/unicode.c
   *
   * Copyright (C) 2001
   * Brad Boyer (flar@allandria.com)
   * (C) 2003 Ardis Technologies <roman@ardistech.com>
   *
   * Handler routines for unicode strings
   */
  
  #include <linux/types.h>
  #include <linux/nls.h>
  #include "hfsplus_fs.h"
  #include "hfsplus_raw.h"
  
  /* Fold the case of a unicode char, given the 16 bit value */
  /* Returns folded char, or 0 if ignorable */
  static inline u16 case_fold(u16 c)
  {
20b7643d8   Anton Salikhmetov   hfsplus: spaces/i...
21
  	u16 tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22

20b7643d8   Anton Salikhmetov   hfsplus: spaces/i...
23
24
25
26
27
28
  	tmp = hfsplus_case_fold_table[c >> 8];
  	if (tmp)
  		tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
  	else
  		tmp = c;
  	return tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
  }
  
  /* Compare unicode strings, return values like normal strcmp */
2179d372d   David Elliott   [PATCH] hfs: add ...
32
33
  int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
  		       const struct hfsplus_unistr *s2)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  {
  	u16 len1, len2, c1, c2;
  	const hfsplus_unichr *p1, *p2;
  
  	len1 = be16_to_cpu(s1->length);
  	len2 = be16_to_cpu(s2->length);
  	p1 = s1->unicode;
  	p2 = s2->unicode;
  
  	while (1) {
  		c1 = c2 = 0;
  
  		while (len1 && !c1) {
  			c1 = case_fold(be16_to_cpu(*p1));
  			p1++;
  			len1--;
  		}
  		while (len2 && !c2) {
  			c2 = case_fold(be16_to_cpu(*p2));
  			p2++;
  			len2--;
  		}
  
  		if (c1 != c2)
  			return (c1 < c2) ? -1 : 1;
  		if (!c1 && !c2)
  			return 0;
  	}
  }
2179d372d   David Elliott   [PATCH] hfs: add ...
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  /* Compare names as a sequence of 16-bit unsigned integers */
  int hfsplus_strcmp(const struct hfsplus_unistr *s1,
  		   const struct hfsplus_unistr *s2)
  {
  	u16 len1, len2, c1, c2;
  	const hfsplus_unichr *p1, *p2;
  	int len;
  
  	len1 = be16_to_cpu(s1->length);
  	len2 = be16_to_cpu(s2->length);
  	p1 = s1->unicode;
  	p2 = s2->unicode;
  
  	for (len = min(len1, len2); len > 0; len--) {
  		c1 = be16_to_cpu(*p1);
  		c2 = be16_to_cpu(*p2);
  		if (c1 != c2)
  			return c1 < c2 ? -1 : 1;
  		p1++;
  		p2++;
  	}
  
  	return len1 < len2 ? -1 :
  	       len1 > len2 ? 1 : 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  #define Hangul_SBase	0xac00
  #define Hangul_LBase	0x1100
  #define Hangul_VBase	0x1161
  #define Hangul_TBase	0x11a7
  #define Hangul_SCount	11172
  #define Hangul_LCount	19
  #define Hangul_VCount	21
  #define Hangul_TCount	28
  #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
  
  
  static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
  {
  	int i, s, e;
  
  	s = 1;
  	e = p[1];
  	if (!e || cc < p[s * 2] || cc > p[e * 2])
  		return NULL;
  	do {
  		i = (s + e) / 2;
  		if (cc > p[i * 2])
  			s = i + 1;
  		else if (cc < p[i * 2])
  			e = i - 1;
  		else
  			return hfsplus_compose_table + p[i * 2 + 1];
  	} while (s <= e);
  	return NULL;
  }
2753cc281   Anton Salikhmetov   hfsplus: over 80 ...
118
119
120
  int hfsplus_uni2asc(struct super_block *sb,
  		const struct hfsplus_unistr *ustr,
  		char *astr, int *len_p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
122
  {
  	const hfsplus_unichr *ip;
dd73a01a3   Christoph Hellwig   hfsplus: fix HFSP...
123
  	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
125
126
127
128
129
130
131
132
133
  	u8 *op;
  	u16 cc, c0, c1;
  	u16 *ce1, *ce2;
  	int i, len, ustrlen, res, compose;
  
  	op = astr;
  	ip = ustr->unicode;
  	ustrlen = be16_to_cpu(ustr->length);
  	len = *len_p;
  	ce1 = NULL;
84adede31   Christoph Hellwig   hfsplus: use atom...
134
  	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
136
137
138
139
140
141
  
  	while (ustrlen > 0) {
  		c0 = be16_to_cpu(*ip++);
  		ustrlen--;
  		/* search for single decomposed char */
  		if (likely(compose))
  			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
142
143
144
145
146
  		if (ce1)
  			cc = ce1[0];
  		else
  			cc = 0;
  		if (cc) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
  			/* start of a possibly decomposed Hangul char */
  			if (cc != 0xffff)
  				goto done;
  			if (!ustrlen)
  				goto same;
  			c1 = be16_to_cpu(*ip) - Hangul_VBase;
  			if (c1 < Hangul_VCount) {
  				/* compose the Hangul char */
  				cc = (c0 - Hangul_LBase) * Hangul_VCount;
  				cc = (cc + c1) * Hangul_TCount;
  				cc += Hangul_SBase;
  				ip++;
  				ustrlen--;
  				if (!ustrlen)
  					goto done;
  				c1 = be16_to_cpu(*ip) - Hangul_TBase;
  				if (c1 > 0 && c1 < Hangul_TCount) {
  					cc += c1;
  					ip++;
  					ustrlen--;
  				}
  				goto done;
  			}
  		}
  		while (1) {
  			/* main loop for common case of not composed chars */
  			if (!ustrlen)
  				goto same;
  			c1 = be16_to_cpu(*ip);
  			if (likely(compose))
2753cc281   Anton Salikhmetov   hfsplus: over 80 ...
177
178
  				ce1 = hfsplus_compose_lookup(
  					hfsplus_compose_table, c1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  			if (ce1)
  				break;
  			switch (c0) {
  			case 0:
  				c0 = 0x2400;
  				break;
  			case '/':
  				c0 = ':';
  				break;
  			}
  			res = nls->uni2char(c0, op, len);
  			if (res < 0) {
  				if (res == -ENAMETOOLONG)
  					goto out;
  				*op = '?';
  				res = 1;
  			}
  			op += res;
  			len -= res;
  			c0 = c1;
  			ip++;
  			ustrlen--;
  		}
  		ce2 = hfsplus_compose_lookup(ce1, c0);
  		if (ce2) {
  			i = 1;
  			while (i < ustrlen) {
2753cc281   Anton Salikhmetov   hfsplus: over 80 ...
206
207
  				ce1 = hfsplus_compose_lookup(ce2,
  					be16_to_cpu(ip[i]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
209
210
211
212
  				if (!ce1)
  					break;
  				i++;
  				ce2 = ce1;
  			}
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
213
214
  			cc = ce2[0];
  			if (cc) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
216
217
218
219
  				ip += i;
  				ustrlen -= i;
  				goto done;
  			}
  		}
20b7643d8   Anton Salikhmetov   hfsplus: spaces/i...
220
  same:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
222
223
224
225
226
227
228
229
230
  		switch (c0) {
  		case 0:
  			cc = 0x2400;
  			break;
  		case '/':
  			cc = ':';
  			break;
  		default:
  			cc = c0;
  		}
20b7643d8   Anton Salikhmetov   hfsplus: spaces/i...
231
  done:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
  		res = nls->uni2char(cc, op, len);
  		if (res < 0) {
  			if (res == -ENAMETOOLONG)
  				goto out;
  			*op = '?';
  			res = 1;
  		}
  		op += res;
  		len -= res;
  	}
  	res = 0;
  out:
  	*len_p = (char *)op - astr;
  	return res;
  }
1e96b7ca1   Duane Griffin   HFS+: refactor AS...
247
248
249
250
251
252
  /*
   * Convert one or more ASCII characters into a single unicode character.
   * Returns the number of ASCII characters corresponding to the unicode char.
   */
  static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
  			      wchar_t *uc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
  {
dd73a01a3   Christoph Hellwig   hfsplus: fix HFSP...
254
  	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
1e96b7ca1   Duane Griffin   HFS+: refactor AS...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
  	if (size <= 0) {
  		*uc = '?';
  		size = 1;
  	}
  	switch (*uc) {
  	case 0x2400:
  		*uc = 0;
  		break;
  	case ':':
  		*uc = '/';
  		break;
  	}
  	return size;
  }
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
269
270
  /* Decomposes a non-Hangul unicode character. */
  static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
1e96b7ca1   Duane Griffin   HFS+: refactor AS...
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
  {
  	int off;
  
  	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
  	if (off == 0 || off == 0xffff)
  		return NULL;
  
  	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
  	if (!off)
  		return NULL;
  
  	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
  	if (!off)
  		return NULL;
  
  	off = hfsplus_decompose_table[off + (uc & 0xf)];
  	*size = off & 3;
  	if (*size == 0)
  		return NULL;
  	return hfsplus_decompose_table + (off / 4);
  }
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
  /*
   * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
   * precomposed Hangul, otherwise return the length of the decomposition.
   *
   * This function was adapted from sample code from the Unicode Standard
   * Annex #15: Unicode Normalization Forms, version 3.2.0.
   *
   * Copyright (C) 1991-2018 Unicode, Inc.  All rights reserved.  Distributed
   * under the Terms of Use in http://www.unicode.org/copyright.html.
   */
  static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
  {
  	int index;
  	int l, v, t;
  
  	index = uc - Hangul_SBase;
  	if (index < 0 || index >= Hangul_SCount)
  		return 0;
  
  	l = Hangul_LBase + index / Hangul_NCount;
  	v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
  	t = Hangul_TBase + index % Hangul_TCount;
  
  	result[0] = l;
  	result[1] = v;
  	if (t != Hangul_TBase) {
  		result[2] = t;
  		return 3;
  	}
  	return 2;
  }
  
  /* Decomposes a single unicode character. */
  static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
  {
  	u16 *result;
  
  	/* Hangul is handled separately */
  	result = hangul_buffer;
  	*size = hfsplus_try_decompose_hangul(uc, result);
  	if (*size == 0)
  		result = hfsplus_decompose_nonhangul(uc, size);
  	return result;
  }
324ef39a8   Vyacheslav Dubeyko   hfsplus: add supp...
336
337
  int hfsplus_asc2uni(struct super_block *sb,
  		    struct hfsplus_unistr *ustr, int max_unistr_len,
1e96b7ca1   Duane Griffin   HFS+: refactor AS...
338
339
340
341
  		    const char *astr, int len)
  {
  	int size, dsize, decompose;
  	u16 *dstr, outlen = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
  	wchar_t c;
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
343
  	u16 dhangul[3];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344

84adede31   Christoph Hellwig   hfsplus: use atom...
345
  	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
324ef39a8   Vyacheslav Dubeyko   hfsplus: add supp...
346
  	while (outlen < max_unistr_len && len > 0) {
1e96b7ca1   Duane Griffin   HFS+: refactor AS...
347
  		size = asc2unichar(sb, astr, len, &c);
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
348
  		if (decompose)
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
349
  			dstr = decompose_unichar(c, &dsize, dhangul);
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
350
351
352
  		else
  			dstr = NULL;
  		if (dstr) {
324ef39a8   Vyacheslav Dubeyko   hfsplus: add supp...
353
  			if (outlen + dsize > max_unistr_len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
355
  				break;
  			do {
1e96b7ca1   Duane Griffin   HFS+: refactor AS...
356
357
358
359
360
361
362
  				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
  			} while (--dsize > 0);
  		} else
  			ustr->unicode[outlen++] = cpu_to_be16(c);
  
  		astr += size;
  		len -= size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
365
366
367
368
  	}
  	ustr->length = cpu_to_be16(outlen);
  	if (len > 0)
  		return -ENAMETOOLONG;
  	return 0;
  }
d45bce8fa   Duane Griffin   HFS+: add custom ...
369
370
371
372
373
374
  
  /*
   * Hash a string to an integer as appropriate for the HFS+ filesystem.
   * Composed unicode characters are decomposed and case-folding is performed
   * if the appropriate bits are (un)set on the superblock.
   */
da53be12b   Linus Torvalds   Don't pass inode ...
375
  int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
d45bce8fa   Duane Griffin   HFS+: add custom ...
376
377
378
379
  {
  	struct super_block *sb = dentry->d_sb;
  	const char *astr;
  	const u16 *dstr;
8aa84ab99   Andrew Morton   fs/hfsplus/unicod...
380
  	int casefold, decompose, size, len;
d45bce8fa   Duane Griffin   HFS+: add custom ...
381
382
383
  	unsigned long hash;
  	wchar_t c;
  	u16 c2;
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
384
  	u16 dhangul[3];
d45bce8fa   Duane Griffin   HFS+: add custom ...
385

84adede31   Christoph Hellwig   hfsplus: use atom...
386
387
  	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
  	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
8387ff257   Linus Torvalds   vfs: make the str...
388
  	hash = init_name_hash(dentry);
d45bce8fa   Duane Griffin   HFS+: add custom ...
389
390
391
  	astr = str->name;
  	len = str->len;
  	while (len > 0) {
8aa84ab99   Andrew Morton   fs/hfsplus/unicod...
392
  		int uninitialized_var(dsize);
d45bce8fa   Duane Griffin   HFS+: add custom ...
393
394
395
  		size = asc2unichar(sb, astr, len, &c);
  		astr += size;
  		len -= size;
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
396
  		if (decompose)
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
397
  			dstr = decompose_unichar(c, &dsize, dhangul);
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
398
399
400
  		else
  			dstr = NULL;
  		if (dstr) {
d45bce8fa   Duane Griffin   HFS+: add custom ...
401
402
  			do {
  				c2 = *dstr++;
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
403
404
405
  				if (casefold)
  					c2 = case_fold(c2);
  				if (!casefold || c2)
d45bce8fa   Duane Griffin   HFS+: add custom ...
406
407
408
409
  					hash = partial_name_hash(c2, hash);
  			} while (--dsize > 0);
  		} else {
  			c2 = c;
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
410
411
412
  			if (casefold)
  				c2 = case_fold(c2);
  			if (!casefold || c2)
d45bce8fa   Duane Griffin   HFS+: add custom ...
413
414
415
416
417
418
419
420
421
422
423
424
425
  				hash = partial_name_hash(c2, hash);
  		}
  	}
  	str->hash = end_name_hash(hash);
  
  	return 0;
  }
  
  /*
   * Compare strings with HFS+ filename ordering.
   * Composed unicode characters are decomposed and case-folding is performed
   * if the appropriate bits are (un)set on the superblock.
   */
6fa67e707   Al Viro   get rid of 'paren...
426
  int hfsplus_compare_dentry(const struct dentry *dentry,
621e155a3   Nick Piggin   fs: change d_comp...
427
  		unsigned int len, const char *str, const struct qstr *name)
d45bce8fa   Duane Griffin   HFS+: add custom ...
428
  {
d3fe19852   Al Viro   cifs, msdos, vfat...
429
  	struct super_block *sb = dentry->d_sb;
d45bce8fa   Duane Griffin   HFS+: add custom ...
430
431
432
433
434
435
  	int casefold, decompose, size;
  	int dsize1, dsize2, len1, len2;
  	const u16 *dstr1, *dstr2;
  	const char *astr1, *astr2;
  	u16 c1, c2;
  	wchar_t c;
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
436
  	u16 dhangul_1[3], dhangul_2[3];
d45bce8fa   Duane Griffin   HFS+: add custom ...
437

84adede31   Christoph Hellwig   hfsplus: use atom...
438
439
  	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
  	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
621e155a3   Nick Piggin   fs: change d_comp...
440
441
442
443
  	astr1 = str;
  	len1 = len;
  	astr2 = name->name;
  	len2 = name->len;
d45bce8fa   Duane Griffin   HFS+: add custom ...
444
445
446
447
448
449
450
451
  	dsize1 = dsize2 = 0;
  	dstr1 = dstr2 = NULL;
  
  	while (len1 > 0 && len2 > 0) {
  		if (!dsize1) {
  			size = asc2unichar(sb, astr1, len1, &c);
  			astr1 += size;
  			len1 -= size;
2753cc281   Anton Salikhmetov   hfsplus: over 80 ...
452
  			if (decompose)
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
453
454
  				dstr1 = decompose_unichar(c, &dsize1,
  							  dhangul_1);
2753cc281   Anton Salikhmetov   hfsplus: over 80 ...
455
  			if (!decompose || !dstr1) {
d45bce8fa   Duane Griffin   HFS+: add custom ...
456
457
458
459
460
461
462
463
464
465
  				c1 = c;
  				dstr1 = &c1;
  				dsize1 = 1;
  			}
  		}
  
  		if (!dsize2) {
  			size = asc2unichar(sb, astr2, len2, &c);
  			astr2 += size;
  			len2 -= size;
2753cc281   Anton Salikhmetov   hfsplus: over 80 ...
466
  			if (decompose)
afd6c9e1f   Ernesto A. Fernández   hfsplus: fix deco...
467
468
  				dstr2 = decompose_unichar(c, &dsize2,
  							  dhangul_2);
2753cc281   Anton Salikhmetov   hfsplus: over 80 ...
469
  			if (!decompose || !dstr2) {
d45bce8fa   Duane Griffin   HFS+: add custom ...
470
471
472
473
474
475
476
477
478
  				c2 = c;
  				dstr2 = &c2;
  				dsize2 = 1;
  			}
  		}
  
  		c1 = *dstr1;
  		c2 = *dstr2;
  		if (casefold) {
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
479
480
  			c1 = case_fold(c1);
  			if (!c1) {
d45bce8fa   Duane Griffin   HFS+: add custom ...
481
482
483
484
  				dstr1++;
  				dsize1--;
  				continue;
  			}
2b4f9ca8a   Anton Salikhmetov   hfsplus: assignme...
485
486
  			c2 = case_fold(c2);
  			if (!c2) {
d45bce8fa   Duane Griffin   HFS+: add custom ...
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
  				dstr2++;
  				dsize2--;
  				continue;
  			}
  		}
  		if (c1 < c2)
  			return -1;
  		else if (c1 > c2)
  			return 1;
  
  		dstr1++;
  		dsize1--;
  		dstr2++;
  		dsize2--;
  	}
  
  	if (len1 < len2)
  		return -1;
  	if (len1 > len2)
  		return 1;
  	return 0;
  }