Blame view

include/charset.h 6.56 KB
f739fcd83   Tom Rini   SPDX: Convert a f...
1
  /* SPDX-License-Identifier: GPL-2.0+ */
78178bb0c   Rob Clark   lib: add some utf...
2
3
4
5
  /*
   *  charset conversion utils
   *
   *  Copyright (c) 2017 Rob Clark
78178bb0c   Rob Clark   lib: add some utf...
6
7
8
9
   */
  
  #ifndef __CHARSET_H_
  #define __CHARSET_H_
d8c28232c   Heinrich Schuchardt   lib: charset: uti...
10
  #include <linux/kernel.h>
f58c5ecb8   Heinrich Schuchardt   efi_loader: new f...
11
  #include <linux/types.h>
984f251fe   Heinrich Schuchardt   efi_loader: MAX_U...
12
  #define MAX_UTF8_PER_UTF16 3
78178bb0c   Rob Clark   lib: add some utf...
13
14
  
  /**
35cbb796a   Heinrich Schuchardt   efi_loader: suppo...
15
16
17
18
19
20
21
22
   * console_read_unicode() - read Unicode code point from console
   *
   * @code:	pointer to store Unicode code point
   * Return:	0 = success
   */
  int console_read_unicode(s32 *code);
  
  /**
d8c28232c   Heinrich Schuchardt   lib: charset: uti...
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
   * utf8_get() - get next UTF-8 code point from buffer
   *
   * @src:		pointer to current byte, updated to point to next byte
   * Return:		code point, or 0 for end of string, or -1 if no legal
   *			code point is found. In case of an error src points to
   *			the incorrect byte.
   */
  s32 utf8_get(const char **src);
  
  /**
   * utf8_put() - write UTF-8 code point to buffer
   *
   * @code:		code point
   * @dst:		pointer to destination buffer, updated to next position
   * Return:		-1 if the input parameters are invalid
   */
  int utf8_put(s32 code, char **dst);
  
  /**
   * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion
   *			  to utf-16
   *
   * @src:		utf-8 string
   * @count:		maximum number of code points to convert
   * Return:		length in bytes after conversion to utf-16 without the
   *			trailing \0. If an invalid UTF-8 sequence is hit one
   *			word will be reserved for a replacement character.
   */
  size_t utf8_utf16_strnlen(const char *src, size_t count);
  
  /**
   * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16
   *
   * @src:		utf-8 string
   * Return:		length in bytes after conversion to utf-16 without the
   *			trailing \0. -1 if the utf-8 string is not valid.
   */
  #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX)
  
  /**
   * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string
   *
   * @dst:		destination buffer
   * @src:		source buffer
   * @count:		maximum number of code points to copy
   * Return:		-1 if the input parameters are invalid
   */
  int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count);
  
  /**
   * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string
   *
   * @dst:		destination buffer
   * @src:		source buffer
   * Return:		-1 if the input parameters are invalid
   */
  #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX)
  
  /**
   * utf16_get() - get next UTF-16 code point from buffer
   *
   * @src:		pointer to current word, updated to point to next word
   * Return:		code point, or 0 for end of string, or -1 if no legal
   *			code point is found. In case of an error src points to
   *			the incorrect word.
   */
  s32 utf16_get(const u16 **src);
  
  /**
   * utf16_put() - write UTF-16 code point to buffer
   *
   * @code:		code point
   * @dst:		pointer to destination buffer, updated to next position
   * Return:		-1 if the input parameters are invalid
   */
  int utf16_put(s32 code, u16 **dst);
  
  /**
   * utf16_strnlen() - length of a truncated utf-16 string
   *
   * @src:		utf-16 string
   * @count:		maximum number of code points to convert
   * Return:		length in code points. If an invalid UTF-16 sequence is
   *			hit one position will be reserved for a replacement
   *			character.
   */
  size_t utf16_strnlen(const u16 *src, size_t count);
  
  /**
   * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion
   *			  to utf-8
   *
   * @src:		utf-16 string
   * @count:		maximum number of code points to convert
   * Return:		length in bytes after conversion to utf-8 without the
   *			trailing \0. If an invalid UTF-16 sequence is hit one
   *			byte will be reserved for a replacement character.
   */
  size_t utf16_utf8_strnlen(const u16 *src, size_t count);
  
  /**
   * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8
   *
   * @src:		utf-16 string
   * Return:		length in bytes after conversion to utf-8 without the
   *			trailing \0. -1 if the utf-16 string is not valid.
   */
  #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX)
  
  /**
   * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string
   *
   * @dst:		destination buffer
   * @src:		source buffer
   * @count:		maximum number of code points to copy
   * Return:		-1 if the input parameters are invalid
   */
  int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count);
  
  /**
   * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string
   *
   * @dst:		destination buffer
   * @src:		source buffer
   * Return:		-1 if the input parameters are invalid
   */
  #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX)
  
  /**
b5130a812   Heinrich Schuchardt   lib: charset: upp...
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
   * utf_to_lower() - convert a Unicode letter to lower case
   *
   * @code:		letter to convert
   * Return:		lower case letter or unchanged letter
   */
  s32 utf_to_lower(const s32 code);
  
  /**
   * utf_to_upper() - convert a Unicode letter to upper case
   *
   * @code:		letter to convert
   * Return:		upper case letter or unchanged letter
   */
  s32 utf_to_upper(const s32 code);
  
  /**
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
168
   * u16_strlen - count non-zero words
78178bb0c   Rob Clark   lib: add some utf...
169
   *
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
170
171
   * This function matches wsclen() if the -fshort-wchar compiler flag is set.
   * In the EFI context we explicitly need a function handling u16 strings.
78178bb0c   Rob Clark   lib: add some utf...
172
   *
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
173
174
175
   * @in:			null terminated u16 string
   * ReturnValue:		number of non-zero words.
   *			This is not the number of utf-16 letters!
78178bb0c   Rob Clark   lib: add some utf...
176
   */
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
177
  size_t u16_strlen(const u16 *in);
78178bb0c   Rob Clark   lib: add some utf...
178
179
  
  /**
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
180
   * u16_strlen - count non-zero words
78178bb0c   Rob Clark   lib: add some utf...
181
   *
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
182
183
   * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set.
   * In the EFI context we explicitly need a function handling u16 strings.
78178bb0c   Rob Clark   lib: add some utf...
184
   *
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
185
186
187
188
   * @in:			null terminated u16 string
   * @count:		maximum number of words to count
   * ReturnValue:		number of non-zero words.
   *			This is not the number of utf-16 letters!
78178bb0c   Rob Clark   lib: add some utf...
189
   */
1dde0d57a   Heinrich Schuchardt   efi_loader: renam...
190
  size_t u16_strnlen(const u16 *in, size_t count);
78178bb0c   Rob Clark   lib: add some utf...
191
192
  
  /**
2a3537ae2   Akashi, Takahiro   lib: add u16_strc...
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
   * u16_strcpy() - copy u16 string
   *
   * Copy u16 string pointed to by src, including terminating null word, to
   * the buffer pointed to by dest.
   *
   * @dest:		destination buffer
   * @src:		source buffer (null terminated)
   * Return:		'dest' address
   */
  u16 *u16_strcpy(u16 *dest, const u16 *src);
  
  /**
   * u16_strdup() - duplicate u16 string
   *
   * Copy u16 string pointed to by src, including terminating null word, to a
   * newly allocated buffer.
   *
   * @src:		source buffer (null terminated)
   * Return:		allocated new buffer on success, NULL on failure
   */
  u16 *u16_strdup(const u16 *src);
  
  /**
78178bb0c   Rob Clark   lib: add some utf...
216
217
218
219
220
   * utf16_to_utf8() - Convert an utf16 string to utf8
   *
   * Converts 'size' characters of the utf16 string 'src' to utf8
   * written to the 'dest' buffer.
   *
984f251fe   Heinrich Schuchardt   efi_loader: MAX_U...
221
   * NOTE that a single utf16 character can generate up to 3 utf8
78178bb0c   Rob Clark   lib: add some utf...
222
223
224
225
226
227
228
229
230
231
   * characters.  See MAX_UTF8_PER_UTF16.
   *
   * @dest   the destination buffer to write the utf8 characters
   * @src    the source utf16 string
   * @size   the number of utf16 characters to convert
   * @return the pointer to the first unwritten byte in 'dest'
   */
  uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
  
  #endif /* __CHARSET_H_ */