Blame view

fs/cifs/cifs_unicode.c 8.77 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   *   fs/cifs/cifs_unicode.c
   *
d185cda77   Steve French   [CIFS] rename cif...
4
   *   Copyright (c) International Business Machines  Corp., 2000,2009
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
7
8
   *   Modified by Steve French (sfrench@us.ibm.com)
   *
   *   This program is free software;  you can redistribute it and/or modify
   *   it under the terms of the GNU General Public License as published by
221601c3d   Steve French   [CIFS] whitespace...
9
   *   the Free Software Foundation; either version 2 of the License, or
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
   *   (at your option) any later version.
221601c3d   Steve French   [CIFS] whitespace...
11
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
17
   *   This program is distributed in the hope that it will be useful,
   *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
   *   the GNU General Public License for more details.
   *
   *   You should have received a copy of the GNU General Public License
221601c3d   Steve French   [CIFS] whitespace...
18
   *   along with this program;  if not, write to the Free Software
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
   */
  #include <linux/fs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
22
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
  #include "cifs_unicode.h"
  #include "cifs_uniupr.h"
  #include "cifspdu.h"
3979877e5   Steve French   [CIFS] Support fo...
26
  #include "cifsglob.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
  #include "cifs_debug.h"
  
  /*
69f801fca   Jeff Layton   cifs: add new fun...
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
   * cifs_ucs2_bytes - how long will a string be after conversion?
   * @ucs - pointer to input string
   * @maxbytes - don't go past this many bytes of input string
   * @codepage - destination codepage
   *
   * Walk a ucs2le string and return the number of bytes that the string will
   * be after being converted to the given charset, not including any null
   * termination required. Don't walk past maxbytes in the source buffer.
   */
  int
  cifs_ucs2_bytes(const __le16 *from, int maxbytes,
  		const struct nls_table *codepage)
  {
  	int i;
  	int charlen, outlen = 0;
  	int maxwords = maxbytes / 2;
  	char tmp[NLS_MAX_CHARSET_SIZE];
ba2dbf30d   Jeff Layton   cifs: clean up un...
47
  	__u16 ftmp;
69f801fca   Jeff Layton   cifs: add new fun...
48

ba2dbf30d   Jeff Layton   cifs: clean up un...
49
50
51
52
53
54
  	for (i = 0; i < maxwords; i++) {
  		ftmp = get_unaligned_le16(&from[i]);
  		if (ftmp == 0)
  			break;
  
  		charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
69f801fca   Jeff Layton   cifs: add new fun...
55
56
57
58
59
60
61
62
63
64
  		if (charlen > 0)
  			outlen += charlen;
  		else
  			outlen++;
  	}
  
  	return outlen;
  }
  
  /*
ba2dbf30d   Jeff Layton   cifs: clean up un...
65
   * cifs_mapchar - convert a host-endian char to proper char in codepage
7fabf0c94   Jeff Layton   cifs: add replace...
66
   * @target - where converted character should be copied
ba2dbf30d   Jeff Layton   cifs: clean up un...
67
   * @src_char - 2 byte host-endian source character
7fabf0c94   Jeff Layton   cifs: add replace...
68
69
70
71
72
73
74
75
   * @cp - codepage to which character should be converted
   * @mapchar - should character be mapped according to mapchars mount option?
   *
   * This function handles the conversion of a single character. It is the
   * responsibility of the caller to ensure that the target buffer is large
   * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
   */
  static int
ba2dbf30d   Jeff Layton   cifs: clean up un...
76
  cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
7fabf0c94   Jeff Layton   cifs: add replace...
77
78
79
80
81
82
83
84
85
86
87
88
  	     bool mapchar)
  {
  	int len = 1;
  
  	if (!mapchar)
  		goto cp_convert;
  
  	/*
  	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
  	 *     build_path_from_dentry are modified, as they use slash as
  	 *     separator.
  	 */
ba2dbf30d   Jeff Layton   cifs: clean up un...
89
  	switch (src_char) {
7fabf0c94   Jeff Layton   cifs: add replace...
90
91
92
  	case UNI_COLON:
  		*target = ':';
  		break;
581ade4d1   Jeff Layton   cifs: clean up va...
93
  	case UNI_ASTERISK:
7fabf0c94   Jeff Layton   cifs: add replace...
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
  		*target = '*';
  		break;
  	case UNI_QUESTION:
  		*target = '?';
  		break;
  	case UNI_PIPE:
  		*target = '|';
  		break;
  	case UNI_GRTRTHAN:
  		*target = '>';
  		break;
  	case UNI_LESSTHAN:
  		*target = '<';
  		break;
  	default:
  		goto cp_convert;
  	}
  
  out:
  	return len;
  
  cp_convert:
ba2dbf30d   Jeff Layton   cifs: clean up un...
116
  	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
7fabf0c94   Jeff Layton   cifs: add replace...
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
  	if (len <= 0) {
  		*target = '?';
  		len = 1;
  	}
  	goto out;
  }
  
  /*
   * cifs_from_ucs2 - convert utf16le string to local charset
   * @to - destination buffer
   * @from - source buffer
   * @tolen - destination buffer size (in bytes)
   * @fromlen - source buffer size (in bytes)
   * @codepage - codepage to which characters should be converted
   * @mapchar - should characters be remapped according to the mapchars option?
   *
   * Convert a little-endian ucs2le string (as sent by the server) to a string
   * in the provided codepage. The tolen and fromlen parameters are to ensure
   * that the code doesn't walk off of the end of the buffer (which is always
   * a danger if the alignment of the source buffer is off). The destination
   * string is always properly null terminated and fits in the destination
   * buffer. Returns the length of the destination string in bytes (including
   * null terminator).
   *
   * Note that some windows versions actually send multiword UTF-16 characters
   * instead of straight UCS-2. The linux nls routines however aren't able to
   * deal with those characters properly. In the event that we get some of
   * those characters, they won't be translated properly.
   */
  int
  cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
  		 const struct nls_table *codepage, bool mapchar)
  {
  	int i, charlen, safelen;
  	int outlen = 0;
  	int nullsize = nls_nullsize(codepage);
  	int fromwords = fromlen / 2;
  	char tmp[NLS_MAX_CHARSET_SIZE];
ba2dbf30d   Jeff Layton   cifs: clean up un...
155
  	__u16 ftmp;
7fabf0c94   Jeff Layton   cifs: add replace...
156
157
158
159
160
161
162
163
  
  	/*
  	 * because the chars can be of varying widths, we need to take care
  	 * not to overflow the destination buffer when we get close to the
  	 * end of it. Until we get to this offset, we don't need to check
  	 * for overflow however.
  	 */
  	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
ba2dbf30d   Jeff Layton   cifs: clean up un...
164
165
166
167
  	for (i = 0; i < fromwords; i++) {
  		ftmp = get_unaligned_le16(&from[i]);
  		if (ftmp == 0)
  			break;
7fabf0c94   Jeff Layton   cifs: add replace...
168
169
170
171
172
  		/*
  		 * check to see if converting this character might make the
  		 * conversion bleed into the null terminator
  		 */
  		if (outlen >= safelen) {
ba2dbf30d   Jeff Layton   cifs: clean up un...
173
  			charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
7fabf0c94   Jeff Layton   cifs: add replace...
174
175
176
177
178
  			if ((outlen + charlen) > (tolen - nullsize))
  				break;
  		}
  
  		/* put converted char into 'to' buffer */
ba2dbf30d   Jeff Layton   cifs: clean up un...
179
  		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
7fabf0c94   Jeff Layton   cifs: add replace...
180
181
182
183
184
185
186
187
188
189
190
  		outlen += charlen;
  	}
  
  	/* properly null-terminate string */
  	for (i = 0; i < nullsize; i++)
  		to[outlen++] = 0;
  
  	return outlen;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
192
193
194
195
196
   * NAME:	cifs_strtoUCS()
   *
   * FUNCTION:	Convert character string to unicode string
   *
   */
  int
ad7a2926b   Steve French   [CIFS] reduce che...
197
  cifs_strtoUCS(__le16 *to, const char *from, int len,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
199
200
201
  	      const struct nls_table *codepage)
  {
  	int charlen;
  	int i;
ba2dbf30d   Jeff Layton   cifs: clean up un...
202
  	wchar_t wchar_to; /* needed to quiet sparse */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
  
  	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
ba2dbf30d   Jeff Layton   cifs: clean up un...
205
  		charlen = codepage->char2uni(from, len, &wchar_to);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
  		if (charlen < 1) {
ba2dbf30d   Jeff Layton   cifs: clean up un...
207
208
  			cERROR(1, "strtoUCS: char2uni of 0x%x returned %d",
  				*from, charlen);
69114089b   Steve French   [CIFS] Reduce spa...
209
  			/* A question mark */
ba2dbf30d   Jeff Layton   cifs: clean up un...
210
  			wchar_to = 0x003f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  			charlen = 1;
ba2dbf30d   Jeff Layton   cifs: clean up un...
212
213
  		}
  		put_unaligned_le16(wchar_to, &to[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
  	}
ba2dbf30d   Jeff Layton   cifs: clean up un...
215
  	put_unaligned_le16(0, &to[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216
217
  	return i;
  }
066ce6899   Jeff Layton   cifs: rename cifs...
218
  /*
d185cda77   Steve French   [CIFS] rename cif...
219
   * cifs_strndup_from_ucs - copy a string from wire format to the local codepage
066ce6899   Jeff Layton   cifs: rename cifs...
220
221
222
223
224
225
226
227
228
229
   * @src - source string
   * @maxlen - don't walk past this many bytes in the source string
   * @is_unicode - is this a unicode string?
   * @codepage - destination codepage
   *
   * Take a string given by the server, convert it to the local codepage and
   * put it in a new buffer. Returns a pointer to the new string or NULL on
   * error.
   */
  char *
d185cda77   Steve French   [CIFS] rename cif...
230
  cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
066ce6899   Jeff Layton   cifs: rename cifs...
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  	     const struct nls_table *codepage)
  {
  	int len;
  	char *dst;
  
  	if (is_unicode) {
  		len = cifs_ucs2_bytes((__le16 *) src, maxlen, codepage);
  		len += nls_nullsize(codepage);
  		dst = kmalloc(len, GFP_KERNEL);
  		if (!dst)
  			return NULL;
  		cifs_from_ucs2(dst, (__le16 *) src, len, maxlen, codepage,
  			       false);
  	} else {
  		len = strnlen(src, maxlen);
  		len++;
  		dst = kmalloc(len, GFP_KERNEL);
  		if (!dst)
  			return NULL;
  		strlcpy(dst, src, len);
  	}
  
  	return dst;
  }
84cdf74e8   Jeff Layton   cifs: fix unalign...
255
256
257
258
259
260
261
  /*
   * Convert 16 bit Unicode pathname to wire format from string in current code
   * page. Conversion may involve remapping up the six characters that are
   * only legal in POSIX-like OS (if they are present in the string). Path
   * names are little endian 16 bit Unicode on the wire
   */
  int
581ade4d1   Jeff Layton   cifs: clean up va...
262
  cifsConvertToUCS(__le16 *target, const char *source, int srclen,
84cdf74e8   Jeff Layton   cifs: fix unalign...
263
264
265
  		 const struct nls_table *cp, int mapChars)
  {
  	int i, j, charlen;
84cdf74e8   Jeff Layton   cifs: fix unalign...
266
  	char src_char;
581ade4d1   Jeff Layton   cifs: clean up va...
267
268
  	__le16 dst_char;
  	wchar_t tmp;
84cdf74e8   Jeff Layton   cifs: fix unalign...
269
270
271
  
  	if (!mapChars)
  		return cifs_strtoUCS(target, source, PATH_MAX, cp);
581ade4d1   Jeff Layton   cifs: clean up va...
272
  	for (i = 0, j = 0; i < srclen; j++) {
84cdf74e8   Jeff Layton   cifs: fix unalign...
273
  		src_char = source[i];
11379b5e3   Jeff Layton   cifs: fix cifsCon...
274
  		charlen = 1;
84cdf74e8   Jeff Layton   cifs: fix unalign...
275
276
  		switch (src_char) {
  		case 0:
581ade4d1   Jeff Layton   cifs: clean up va...
277
  			put_unaligned(0, &target[j]);
84cdf74e8   Jeff Layton   cifs: fix unalign...
278
279
  			goto ctoUCS_out;
  		case ':':
581ade4d1   Jeff Layton   cifs: clean up va...
280
  			dst_char = cpu_to_le16(UNI_COLON);
84cdf74e8   Jeff Layton   cifs: fix unalign...
281
282
  			break;
  		case '*':
581ade4d1   Jeff Layton   cifs: clean up va...
283
  			dst_char = cpu_to_le16(UNI_ASTERISK);
84cdf74e8   Jeff Layton   cifs: fix unalign...
284
285
  			break;
  		case '?':
581ade4d1   Jeff Layton   cifs: clean up va...
286
  			dst_char = cpu_to_le16(UNI_QUESTION);
84cdf74e8   Jeff Layton   cifs: fix unalign...
287
288
  			break;
  		case '<':
581ade4d1   Jeff Layton   cifs: clean up va...
289
  			dst_char = cpu_to_le16(UNI_LESSTHAN);
84cdf74e8   Jeff Layton   cifs: fix unalign...
290
291
  			break;
  		case '>':
581ade4d1   Jeff Layton   cifs: clean up va...
292
  			dst_char = cpu_to_le16(UNI_GRTRTHAN);
84cdf74e8   Jeff Layton   cifs: fix unalign...
293
294
  			break;
  		case '|':
581ade4d1   Jeff Layton   cifs: clean up va...
295
  			dst_char = cpu_to_le16(UNI_PIPE);
84cdf74e8   Jeff Layton   cifs: fix unalign...
296
297
298
299
300
301
302
  			break;
  		/*
  		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
  		 * until all the calls to build_path_from_dentry are modified,
  		 * as they use backslash as separator.
  		 */
  		default:
581ade4d1   Jeff Layton   cifs: clean up va...
303
304
  			charlen = cp->char2uni(source + i, srclen - i, &tmp);
  			dst_char = cpu_to_le16(tmp);
84cdf74e8   Jeff Layton   cifs: fix unalign...
305
306
307
308
309
  			/*
  			 * if no match, use question mark, which at least in
  			 * some cases serves as wild card
  			 */
  			if (charlen < 1) {
581ade4d1   Jeff Layton   cifs: clean up va...
310
  				dst_char = cpu_to_le16(0x003f);
84cdf74e8   Jeff Layton   cifs: fix unalign...
311
312
  				charlen = 1;
  			}
84cdf74e8   Jeff Layton   cifs: fix unalign...
313
  		}
11379b5e3   Jeff Layton   cifs: fix cifsCon...
314
315
316
317
318
  		/*
  		 * character may take more than one byte in the source string,
  		 * but will take exactly two bytes in the target string
  		 */
  		i += charlen;
581ade4d1   Jeff Layton   cifs: clean up va...
319
  		put_unaligned(dst_char, &target[j]);
84cdf74e8   Jeff Layton   cifs: fix unalign...
320
321
322
323
324
  	}
  
  ctoUCS_out:
  	return i;
  }