LibreOffice
LibreOffice 7.1 SDK C/C++ API Reference
character.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #ifndef INCLUDED_RTL_CHARACTER_HXX
21 #define INCLUDED_RTL_CHARACTER_HXX
22 
23 #include "sal/config.h"
24 
25 #include <cassert>
26 #include <cstddef>
27 
28 #include "sal/types.h"
29 
30 namespace rtl
31 {
40 inline bool isUnicodeCodePoint(sal_uInt32 code) { return code <= 0x10FFFF; }
41 
50 inline bool isAscii(sal_uInt32 code)
51 {
52  assert(isUnicodeCodePoint(code));
53  return code <= 0x7F;
54 }
55 
56 #if defined LIBO_INTERNAL_ONLY
57 bool isAscii(char) = delete;
58 bool isAscii(signed char) = delete;
59 template <typename T> inline bool isAscii(T code) { return isAscii(sal_uInt32(code)); }
60 #endif
61 
71 inline bool isAsciiLowerCase(sal_uInt32 code)
72 {
73  assert(isUnicodeCodePoint(code));
74  return code >= 'a' && code <= 'z';
75 }
76 
77 #if defined LIBO_INTERNAL_ONLY
78 bool isAsciiLowerCase(char) = delete;
79 bool isAsciiLowerCase(signed char) = delete;
80 template <typename T> inline bool isAsciiLowerCase(T code)
81 {
82  return isAsciiLowerCase(sal_uInt32(code));
83 }
84 #endif
85 
95 inline bool isAsciiUpperCase(sal_uInt32 code)
96 {
97  assert(isUnicodeCodePoint(code));
98  return code >= 'A' && code <= 'Z';
99 }
100 
101 #if defined LIBO_INTERNAL_ONLY
102 bool isAsciiUpperCase(char) = delete;
103 bool isAsciiUpperCase(signed char) = delete;
104 template <typename T> inline bool isAsciiUpperCase(T code)
105 {
106  return isAsciiUpperCase(sal_uInt32(code));
107 }
108 #endif
109 
119 inline bool isAsciiAlpha(sal_uInt32 code)
120 {
121  assert(isUnicodeCodePoint(code));
122  return isAsciiLowerCase(code) || isAsciiUpperCase(code);
123 }
124 
125 #if defined LIBO_INTERNAL_ONLY
126 bool isAsciiAlpha(char) = delete;
127 bool isAsciiAlpha(signed char) = delete;
128 template <typename T> inline bool isAsciiAlpha(T code) { return isAsciiAlpha(sal_uInt32(code)); }
129 #endif
130 
140 inline bool isAsciiDigit(sal_uInt32 code)
141 {
142  assert(isUnicodeCodePoint(code));
143  return code >= '0' && code <= '9';
144 }
145 
146 #if defined LIBO_INTERNAL_ONLY
147 bool isAsciiDigit(char) = delete;
148 bool isAsciiDigit(signed char) = delete;
149 template <typename T> inline bool isAsciiDigit(T code) { return isAsciiDigit(sal_uInt32(code)); }
150 #endif
151 
161 inline bool isAsciiAlphanumeric(sal_uInt32 code)
162 {
163  assert(isUnicodeCodePoint(code));
164  return isAsciiDigit(code) || isAsciiAlpha(code);
165 }
166 
167 #if defined LIBO_INTERNAL_ONLY
168 bool isAsciiAlphanumeric(char) = delete;
169 bool isAsciiAlphanumeric(signed char) = delete;
170 template <typename T> inline bool isAsciiAlphanumeric(T code)
171 {
172  return isAsciiAlphanumeric(sal_uInt32(code));
173 }
174 #endif
175 
185 inline bool isAsciiCanonicHexDigit(sal_uInt32 code)
186 {
187  assert(isUnicodeCodePoint(code));
188  return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
189 }
190 
191 #if defined LIBO_INTERNAL_ONLY
192 bool isAsciiCanonicHexDigit(char) = delete;
193 bool isAsciiCanonicHexDigit(signed char) = delete;
194 template <typename T> inline bool isAsciiCanonicHexDigit(T code)
195 {
196  return isAsciiCanonicHexDigit(sal_uInt32(code));
197 }
198 #endif
199 
209 inline bool isAsciiHexDigit(sal_uInt32 code)
210 {
211  assert(isUnicodeCodePoint(code));
212  return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
213 }
214 
215 #if defined LIBO_INTERNAL_ONLY
216 bool isAsciiHexDigit(char) = delete;
217 bool isAsciiHexDigit(signed char) = delete;
218 template <typename T> inline bool isAsciiHexDigit(T code)
219 {
220  return isAsciiHexDigit(sal_uInt32(code));
221 }
222 #endif
223 
232 inline bool isAsciiOctalDigit(sal_uInt32 code)
233 {
234  assert(isUnicodeCodePoint(code));
235  return code >= '0' && code <= '7';
236 }
237 
238 #if defined LIBO_INTERNAL_ONLY
239 bool isAsciiOctalDigit(char) = delete;
240 bool isAsciiOctalDigit(signed char) = delete;
241 template <typename T> inline bool isAsciiOctalDigit(T code)
242 {
243  return isAsciiOctalDigit(sal_uInt32(code));
244 }
245 #endif
246 
256 inline bool isAsciiWhiteSpace(sal_uInt32 code)
257 {
258  assert(isUnicodeCodePoint(code));
259  return code == ' ' || code == '\f' || code == '\n' || code == '\r' || code == '\t'
260  || code == '\v';
261 }
262 
263 #if defined LIBO_INTERNAL_ONLY
264 bool isAsciiWhiteSpace(char) = delete;
265 bool isAsciiWhiteSpace(signed char) = delete;
266 template <typename T> inline bool isAsciiWhiteSpace(T code)
267 {
268  return isAsciiWhiteSpace(sal_uInt32(code));
269 }
270 #endif
271 
280 inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
281 {
282  assert(isUnicodeCodePoint(code));
283  return isAsciiLowerCase(code) ? code - 32 : code;
284 }
285 
286 #if defined LIBO_INTERNAL_ONLY
287 sal_uInt32 toAsciiUpperCase(char) = delete;
288 sal_uInt32 toAsciiUpperCase(signed char) = delete;
289 template <typename T> inline sal_uInt32 toAsciiUpperCase(T code)
290 {
291  return toAsciiUpperCase(sal_uInt32(code));
292 }
293 #endif
294 
303 inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
304 {
305  assert(isUnicodeCodePoint(code));
306  return isAsciiUpperCase(code) ? code + 32 : code;
307 }
308 
309 #if defined LIBO_INTERNAL_ONLY
310 sal_uInt32 toAsciiLowerCase(char) = delete;
311 sal_uInt32 toAsciiLowerCase(signed char) = delete;
312 template <typename T> inline sal_uInt32 toAsciiLowerCase(T code)
313 {
314  return toAsciiLowerCase(sal_uInt32(code));
315 }
316 #endif
317 
330 inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
331 {
332  assert(isUnicodeCodePoint(code1));
333  assert(isUnicodeCodePoint(code2));
334  return static_cast<sal_Int32>(toAsciiLowerCase(code1))
335  - static_cast<sal_Int32>(toAsciiLowerCase(code2));
336 }
337 
339 namespace detail
340 {
341 sal_uInt32 const surrogatesHighFirst = 0xD800;
342 sal_uInt32 const surrogatesHighLast = 0xDBFF;
343 sal_uInt32 const surrogatesLowFirst = 0xDC00;
344 sal_uInt32 const surrogatesLowLast = 0xDFFF;
345 }
347 
356 inline bool isSurrogate(sal_uInt32 code)
357 {
358  assert(isUnicodeCodePoint(code));
359  return code >= detail::surrogatesHighFirst && code <= detail::surrogatesLowLast;
360 }
361 
370 inline bool isHighSurrogate(sal_uInt32 code)
371 {
372  assert(isUnicodeCodePoint(code));
373  return code >= detail::surrogatesHighFirst && code <= detail::surrogatesHighLast;
374 }
375 
384 inline bool isLowSurrogate(sal_uInt32 code)
385 {
386  assert(isUnicodeCodePoint(code));
387  return code >= detail::surrogatesLowFirst && code <= detail::surrogatesLowLast;
388 }
389 
398 inline sal_Unicode getHighSurrogate(sal_uInt32 code)
399 {
400  assert(isUnicodeCodePoint(code));
401  assert(code >= 0x10000);
402  return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
403 }
404 
413 inline sal_Unicode getLowSurrogate(sal_uInt32 code)
414 {
415  assert(isUnicodeCodePoint(code));
416  assert(code >= 0x10000);
417  return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
418 }
419 
430 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
431 {
432  assert(isHighSurrogate(high));
433  assert(isLowSurrogate(low));
434  return ((high - detail::surrogatesHighFirst) << 10) + (low - detail::surrogatesLowFirst)
435  + 0x10000;
436 }
437 
450 inline std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode* output)
451 {
452  assert(isUnicodeCodePoint(code));
453  assert(output != NULL);
454  if (code < 0x10000)
455  {
456  output[0] = code;
457  return 1;
458  }
459  else
460  {
461  output[0] = getHighSurrogate(code);
462  output[1] = getLowSurrogate(code);
463  return 2;
464  }
465 }
466 
475 inline bool isUnicodeScalarValue(sal_uInt32 code)
476 {
477  return isUnicodeCodePoint(code) && !isSurrogate(code);
478 }
479 }
480 
481 #endif
482 
483 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
sal_uInt16 sal_Unicode
Definition: types.h:119
Definition: bootstrap.hxx:30
bool isAsciiDigit(sal_uInt32 code)
Check for ASCII digit character.
Definition: character.hxx:140
bool isAsciiCanonicHexDigit(sal_uInt32 code)
Check for ASCII canonic hexadecimal digit character.
Definition: character.hxx:185
bool isAsciiAlpha(sal_uInt32 code)
Check for ASCII alphabetic character.
Definition: character.hxx:119
bool isLowSurrogate(sal_uInt32 code)
Check for low surrogate.
Definition: character.hxx:384
bool isAscii(sal_uInt32 code)
Check for ASCII character.
Definition: character.hxx:50
bool isAsciiWhiteSpace(sal_uInt32 code)
Check for ASCII white space character.
Definition: character.hxx:256
std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode *output)
Split a Unicode code point into UTF-16 code units.
Definition: character.hxx:450
bool isAsciiLowerCase(sal_uInt32 code)
Check for ASCII lower case character.
Definition: character.hxx:71
bool isHighSurrogate(sal_uInt32 code)
Check for high surrogate.
Definition: character.hxx:370
bool isAsciiHexDigit(sal_uInt32 code)
Check for ASCII hexadecimal digit character.
Definition: character.hxx:209
bool isAsciiAlphanumeric(sal_uInt32 code)
Check for ASCII alphanumeric character.
Definition: character.hxx:161
bool isAsciiOctalDigit(sal_uInt32 code)
Check for ASCII octal digit character.
Definition: character.hxx:232
bool isAsciiUpperCase(sal_uInt32 code)
Check for ASCII upper case character.
Definition: character.hxx:95
sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
Compare two characters ignoring ASCII case.
Definition: character.hxx:330
sal_Unicode getLowSurrogate(sal_uInt32 code)
Get low surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:413
bool isUnicodeScalarValue(sal_uInt32 code)
Check for Unicode scalar value.
Definition: character.hxx:475
sal_Unicode getHighSurrogate(sal_uInt32 code)
Get high surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:398
bool isUnicodeCodePoint(sal_uInt32 code)
Check for Unicode code point.
Definition: character.hxx:40
sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
Convert a character, if ASCII, to lower case.
Definition: character.hxx:303
sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
Combine surrogates to form a code point.
Definition: character.hxx:430
bool isSurrogate(sal_uInt32 code)
Check for surrogate.
Definition: character.hxx:356
sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
Convert a character, if ASCII, to upper case.
Definition: character.hxx:280