safec  3.2
Safe C Library - ISO TR24731 Bounds Checking Interface
wcsnorm_s.c File Reference
#include "safe_str_lib.h"
#include "unw16ifcan.h"
#include "unw16ifcmb.h"
#include "unw16ifcmp.h"
#include "unw16ifexc.h"
#include "hangul.h"
+ Include dependency graph for wcsnorm_s.c:

Data Structures

struct  UNWIF_cc
 

Macros

#define _UNICODE_MAX   0x10ffff
 
#define CC_SEQ_SIZE   10
 
#define CC_SEQ_STEP   5
 

Functions

bool isExclusion (uint32_t uv)
 
bool isSingleton (uint32_t uv)
 
bool isNonStDecomp (uint32_t uv)
 
bool isComp2nd (uint32_t uv)
 
static int _decomp_canonical_s (wchar_t *dest, rsize_t dmax, uint32_t cp)
 
static int _decomp_hangul_s (wchar_t *dest, rsize_t dmax, uint32_t cp)
 
EXPORT int _decomp_s (wchar_t *restrict dest, rsize_t dmax, const uint32_t cp, const bool iscompat)
 
static int _compare_cc (const void *a, const void *b)
 
static uint32_t _composite_cp (uint32_t cp, uint32_t cp2)
 
static uint8_t _combin_class (uint32_t cp)
 
EXPORT errno_t wcsnorm_decompose_s (wchar_t *restrict dest, rsize_t dmax, wchar_t *restrict src, rsize_t *restrict lenp, bool iscompat)
 Converts the wide string to the canonical NFD normalization, as defined in the latest Unicode standard, latest 10.0. More...
 
EXPORT errno_t wcsnorm_reorder_s (wchar_t *restrict dest, rsize_t dmax, wchar_t *restrict p, rsize_t len)
 Reorder all decomposed sequences in a wide string to NFD, as defined in the latest Unicode standard, latest 10.0. More...
 
EXPORT errno_t wcsnorm_compose_s (wchar_t *restrict dest, rsize_t dmax, wchar_t *restrict p, rsize_t *restrict lenp, bool iscontig)
 Combine all decomposed sequences in a wide string to NFC, as defined in the latest Unicode standard, latest 10.0. More...
 
EXPORT errno_t wcsnorm_s (wchar_t *restrict dest, rsize_t dmax, wchar_t *restrict src, wcsnorm_mode_t mode, rsize_t *restrict lenp)
 Converts the wide string to the canonical NFC or NFD normalization, as defined in the latest Unicode standard, latest 10.0. More...
 

Macro Definition Documentation

◆ _UNICODE_MAX

#define _UNICODE_MAX   0x10ffff

◆ CC_SEQ_SIZE

#define CC_SEQ_SIZE   10

◆ CC_SEQ_STEP

#define CC_SEQ_STEP   5

Function Documentation

◆ isExclusion()

bool isExclusion ( uint32_t  uv)

◆ isSingleton()

bool isSingleton ( uint32_t  uv)

◆ isNonStDecomp()

bool isNonStDecomp ( uint32_t  uv)

◆ isComp2nd()

bool isComp2nd ( uint32_t  uv)

◆ _decomp_canonical_s()

static int _decomp_canonical_s ( wchar_t *  dest,
rsize_t  dmax,
uint32_t  cp 
)
static

◆ _decomp_hangul_s()

static int _decomp_hangul_s ( wchar_t *  dest,
rsize_t  dmax,
uint32_t  cp 
)
static

◆ _decomp_s()

EXPORT int _decomp_s ( wchar_t *restrict  dest,
rsize_t  dmax,
const uint32_t  cp,
const bool  iscompat 
)

◆ _compare_cc()

static int _compare_cc ( const void *  a,
const void *  b 
)
static

◆ _composite_cp()

static uint32_t _composite_cp ( uint32_t  cp,
uint32_t  cp2 
)
static

◆ _combin_class()

static uint8_t _combin_class ( uint32_t  cp)
static

◆ wcsnorm_decompose_s()

EXPORT errno_t wcsnorm_decompose_s ( wchar_t *restrict  dest,
rsize_t  dmax,
wchar_t *restrict  src,
rsize_t *restrict  lenp,
bool  iscompat 
)

Converts the wide string to the canonical NFD normalization, as defined in the latest Unicode standard, latest 10.0.

The conversion stops at the first null or after dmax characters.

Composed characters are checked for the left-hand-size of the Decomposition_Mapping Unicode property, which means the codepoint will be normalized if the sequence is composed. This is equivalent to all 1963 combining mark characters, plus some remaining 869 non-mark and non-hangul normalizables. Hangul has some special normalization logic.

This function is used by wcsnorm_s() to do the argument checking, overlap checking and to do the first of three passes for NFC.

Parameters
[out]destwide string to hold the result
[in]dmaxmaximum result buffer size
[in]srcwide string
[out]lenppointer to length of the result, may be NULL
[in]iscompatdo NFKD, and not NFD (with –enable-norm-compat)
Precondition
dest and src shall not be null pointers.
dmax shall not equal zero and big enough for dest.
dmax shall not be greater than RSIZE_MAX_WSTR.
Returns
If there is a runtime-constraint violation, then if dest is not a null pointer and dmax is greater than zero and not greater than RSIZE_MAX_WSTR, then wcsnorm_s nulls dest.
Return values
EOKon success
ESNULLPwhen dest or src is NULL pointer
ESZEROLwhen dmax = 0
ESLEMINwhen dmax < 5 or 19 with a compat mode
ESLEMAXwhen dmax > RSIZE_MAX_WSTR
ESOVRLPwhen buffers overlap
ESNOSPCwhen dmax too small for the result buffer
EOFon some normalization error
See also
wcsfc_s(), wcsnorm_s(), wcsnorm_compose_s(), wcsnorm_reorder_s(), ICU, gnulib/libunistring, utf8proc

◆ wcsnorm_reorder_s()

EXPORT errno_t wcsnorm_reorder_s ( wchar_t *restrict  dest,
rsize_t  dmax,
wchar_t *restrict  p,
rsize_t  len 
)

Reorder all decomposed sequences in a wide string to NFD, as defined in the latest Unicode standard, latest 10.0.

The conversion stops at the first null or after dmax characters.

Parameters
[out]destwide string to hold the result
[in]dmaxmaximum result buffer size
[in]pwide string to be converted
[in]lenlength of p
Precondition
dest and p shall not be null pointers.
dmax shall not equal zero and big enough for dest.
dmax shall not be greater than RSIZE_MAX_WSTR.
Returns
If there is a runtime-constraint violation, then if dest is not a null pointer and dmax is greater than zero and not greater than RSIZE_MAX_WSTR, then wcsnorm_reorder_s nulls dest.
Return values
EOKon success
ESNOSPCwhen dmax too small for the result buffer
EOFon some normalization error
See also
wcsnorm_s(), wcsnorm_decompose_s(), ICU, gnulib/libunistring, utf8proc

◆ wcsnorm_compose_s()

EXPORT errno_t wcsnorm_compose_s ( wchar_t *restrict  dest,
rsize_t  dmax,
wchar_t *restrict  p,
rsize_t *restrict  lenp,
bool  iscontig 
)

Combine all decomposed sequences in a wide string to NFC, as defined in the latest Unicode standard, latest 10.0.

The conversion stops at the first null or after dmax characters.

Parameters
[out]destwide string to hold the result
[in]dmaxmaximum result buffer size
[in]pwide string to be converted
[out]lenppointer to length of p and the result length.
[in]iscontigif true, the result will only be a fast FCC
Precondition
dest, p and lenp shall not be null pointers.
dmax shall not equal zero and big enough for dest.
dmax shall not be greater than RSIZE_MAX_WSTR.
Returns
If there is a runtime-constraint violation, then if dest is not a null pointer and dmax is greater than zero and not greater than RSIZE_MAX_WSTR, then wcsnorm_reorder_s nulls dest.
Return values
EOKon success
ESNOSPCwhen dmax too small for the result buffer
EOFon some normalization error
See also
wcsnorm_s(), wcsnorm_decompose_s(), ICU, gnulib/libunistring, utf8proc

◆ wcsnorm_s()

EXPORT errno_t wcsnorm_s ( wchar_t *restrict  dest,
rsize_t  dmax,
wchar_t *restrict  src,
wcsnorm_mode_t  mode,
rsize_t *restrict  lenp 
)

Converts the wide string to the canonical NFC or NFD normalization, as defined in the latest Unicode standard, latest 10.0.

The conversion stops at the first null or after dmax characters.

Decomposed characters are checked for the left-hand-size and then right-hand-side of the Decomposition_Mapping Unicode property, which means the codepoint will be normalized if the sequence is composed or decomposed (NFD or NFKD). This is equivalent to all 1963 combining mark characters, plus some remaining 869 non-mark and non-hangul normalizables. Hangul has some special normalization logic.

The compat tables for NFKC or NFKD are too large for a libc, and mostly unused. As default we only provide the smaller canonical conversions, but it can be enabled with –enable-norm-compat. The compat modes also don't roundtrip.

Parameters
[out]destwide string to hold the result
[in]dmaxmaximum length of string
[in]srcwide string
[in]modeconvert to nfc or just nfd. experimentally to fast modes FCD or FCC. optionally to compat modes NFKD, NFKC with –enable-norm-compat
See also
enum wcsnorm_mode.
Parameters
[out]lenppointer to length of the result, may be NULL
Precondition
dest and src shall not be null pointers.
dmax shall not equal zero and big enough for dest.
dmax shall not be greater than RSIZE_MAX_WSTR.
Returns
If there is a runtime-constraint violation, then if dest is not a null pointer and dmax is greater than zero and not greater than RSIZE_MAX_WSTR, then wcsnorm_s nulls dest.
Return values
EOKon success
ESNULLPwhen dest or src is NULL pointer
ESZEROLwhen dmax = 0
ESLEMINwhen dmax < 5
ESLEMAXwhen dmax > RSIZE_MAX_WSTR
ESOVRLPwhen buffers overlap
ESNOSPCwhen dmax too small for the result buffer
EOFany other normalization error
See also
wcsfc_s(), ICU, gnulib/libunistring, utf8proc