-
-
Notifications
You must be signed in to change notification settings - Fork 754
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
1,608 additions
and
527 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
// charutil.c : implementation of various character/string utility functions. | ||
// | ||
// (c) Ulf Frisk, 2021-2022 | ||
// (c) Ulf Frisk, 2021-2023 | ||
// Author: Ulf Frisk, [email protected] | ||
// | ||
#include "charutil.h" | ||
|
@@ -37,6 +37,19 @@ BOOL CharUtil_IsAnsiW(_In_ LPCWSTR wsz) | |
} | ||
} | ||
|
||
BOOL CharUtil_IsAnsiFsA(_In_ LPCSTR sz) | ||
{ | ||
UCHAR c; | ||
DWORD i = 0; | ||
while(TRUE) { | ||
c = sz[i++]; | ||
if(c == 0) { return TRUE; } | ||
if(c > 127) { return FALSE; } | ||
if(CHARUTIL_ANSIFILENAME_ALLOW[c] == '0') { return FALSE; } | ||
if(i > MAX_PATH - 2) { return FALSE; } | ||
} | ||
} | ||
|
||
/* | ||
* Convert Ascii (0-255) or Wide (16-bit LE) string into a UTF-8 string. | ||
* Function support sz/wsz == pbBuffer - sz/wsz will then become overwritten. | ||
|
@@ -699,6 +712,108 @@ BOOL CharUtil_WtoJ(_In_opt_ LPWSTR wsz, _In_ DWORD cch, _Maybenull_ _Writable_by | |
return FALSE; | ||
} | ||
|
||
/* | ||
* Convert UTF-8 string into a CSV compatible string. | ||
* If source string contain either comma(,) space( ) doublequote(") it will be | ||
* treated as a CSV string and be put into double quotes at start/end. | ||
* Function support usz == pbBuffer - usz will then become overwritten. | ||
* CALLER LOCALFREE (if *pvsz != pbBuffer): *pvsz | ||
* -- usz = the string to convert. | ||
* -- cch = -1 for null-terminated string; or max number of chars (excl. null). | ||
* -- pbBuffer = optional buffer to place the result in. | ||
* -- cbBuffer | ||
* -- pvsz = if set to null: function calculate length only and return TRUE. | ||
result utf-8 string, either as (*pvsz == pbBuffer) or LocalAlloc'ed | ||
* buffer that caller is responsible for free. | ||
* -- pcbv = byte length (including terminating null) of utf-8 string. | ||
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_ALLOC or CHARUTIL_FLAG_TRUNCATE | ||
* -- return | ||
*/ | ||
_Success_(return) | ||
BOOL CharUtil_UtoCSV(_In_opt_ LPSTR usz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pvsz, _Out_opt_ PDWORD pcbv, _In_ DWORD flags) | ||
{ | ||
UCHAR c; | ||
LPSTR vsz; | ||
DWORD iu, iv, n, cbu = 0, cbv = 0; | ||
BOOL fCSV = FALSE; | ||
BOOL fTruncate = flags & CHARUTIL_FLAG_TRUNCATE; | ||
if(pcbv) { *pcbv = 0; } | ||
if(pvsz) { *pvsz = NULL; } | ||
if(!usz) { usz = ""; } | ||
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; } | ||
// 1: csv byte-length: | ||
if(usz[0] == '\0') { | ||
fCSV = TRUE; | ||
cbv += 2; | ||
} | ||
if(fTruncate && (!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC))) { goto fail; } | ||
while((cbu < cch) && (c = usz[cbu])) { | ||
if(c & 0x80) { | ||
// utf-8 char: | ||
n = 0; | ||
if((c & 0xe0) == 0xc0) { n = 2; } | ||
if((c & 0xf0) == 0xe0) { n = 3; } | ||
if((c & 0xf8) == 0xf0) { n = 4; } | ||
if(!n) { goto fail; } // invalid char-encoding | ||
if(cbu + n > cch) { break; } | ||
if(fTruncate && (cbv + n >= cbBuffer)) { break; } | ||
if((n > 1) && ((usz[cbu + 1] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding | ||
if((n > 2) && ((usz[cbu + 2] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding | ||
if((n > 3) && ((usz[cbu + 3] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding | ||
cbu += n; | ||
cbv += n; | ||
} else if(c == '"' || c == ' ' || c == ',') { | ||
n = (c == '"') ? 2 : 1; | ||
if(!fCSV) { n += 2; } | ||
if(fTruncate && (cbv + n >= cbBuffer)) { break; } | ||
fCSV = TRUE; | ||
cbu += 1; | ||
cbv += n; | ||
} else { | ||
if(fTruncate && (cbv + 1 >= cbBuffer)) { break; } | ||
cbu += 1; | ||
cbv += 1; | ||
} | ||
} | ||
cbu++; | ||
cbv++; | ||
if(pcbv) { *pcbv = cbv; } | ||
// 2: return on length-request or alloc-fail | ||
if(!pvsz) { | ||
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request | ||
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; } | ||
} | ||
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbv))) { goto fail; } // fail: insufficient buffer space | ||
vsz = (pbBuffer && (cbBuffer >= cbv)) ? pbBuffer : LocalAlloc(0, cbv); | ||
if(!vsz) { goto fail; } // fail: failed buffer space allocation | ||
// 3: populate with CSV UTF-8 string | ||
iu = cbu - 2; iv = cbv - 2; | ||
if(fCSV) { vsz[iv--] = '"'; } | ||
while(iv < 0x7fffffff) { | ||
if(!iv && fCSV) { | ||
vsz[0] = '"'; | ||
break; | ||
} | ||
c = usz[iu--]; | ||
if(c == '"') { | ||
vsz[iv--] = '"'; | ||
} | ||
if(c < 0x20) { | ||
c = '?'; | ||
} | ||
vsz[iv--] = c; | ||
} | ||
vsz[cbv - 1] = 0; | ||
if(pvsz) { *pvsz = vsz; } | ||
return TRUE; | ||
fail: | ||
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) { | ||
if(pvsz) { *pvsz = (LPSTR)pbBuffer; } | ||
if(pcbv) { *pcbv = 1; } | ||
pbBuffer[0] = 0; | ||
} | ||
return FALSE; | ||
} | ||
|
||
|
||
|
||
|
@@ -1027,7 +1142,7 @@ VOID CharUtil_ReplaceAllA(_Inout_ LPSTR sz, _In_ CHAR chOld, _In_ CHAR chNew) | |
* -- usz = utf-8 or ascii string. | ||
* -- uszPath = buffer to receive result. | ||
* -- cbuPath = byte length of uszPath buffer | ||
* -- return | ||
* -- return = last part (i.e. file name) of usz. | ||
*/ | ||
LPSTR CharUtil_PathSplitLastEx(_In_ LPSTR usz, _Out_writes_(cbuPath) LPSTR uszPath, _In_ DWORD cbuPath) | ||
{ | ||
|
@@ -1047,6 +1162,28 @@ LPSTR CharUtil_PathSplitLastEx(_In_ LPSTR usz, _Out_writes_(cbuPath) LPSTR uszPa | |
return uszPath + iSlash + 1; | ||
} | ||
|
||
/* | ||
* Split the string usz into two at the last (back)slash which is removed. | ||
* If no slash is found, the input string is not modified and NULL is returned. | ||
* NB! The input string is modified in place. | ||
* Ex: usz: XXX/YYY/ZZZ/AAA -> usz: XXX/YYY/ZZZ + return: AAA | ||
* -- usz = utf-8 or ascii string to be split/modified. | ||
* -- return = last part (i.e. file name) of usz. | ||
*/ | ||
LPSTR CharUtil_PathSplitLastInPlace(_Inout_ LPSTR usz) | ||
{ | ||
DWORD i = 0, iSlash = -1; | ||
CHAR ch = -1; | ||
while((ch = usz[i])) { | ||
if((ch == '\\') || (ch == '/')) { | ||
iSlash = i; | ||
} | ||
i++; | ||
} | ||
if(iSlash == (DWORD)-1) { return NULL; } | ||
usz[iSlash] = 0; | ||
return usz + iSlash + 1; | ||
} | ||
|
||
/* | ||
* Return the sub-string after the last (back)slash character in usz. | ||
|
@@ -1093,10 +1230,75 @@ LPSTR CharUtil_PathSplitNext(_In_ LPSTR usz) | |
} | ||
} | ||
|
||
/* | ||
* Split a string into two at the first character. | ||
* The 1st string is returned in the pusz1 caller-allocated buffer. The | ||
* remainder is returned as return data (is a sub-string of usz). If no | ||
* 2nd string is found null-terminator character is returned (NB! not as NULL). | ||
* -- usz = utf-8/ascii string to split. | ||
* -- ch = character to split at. | ||
* -- usz1 = buffer to receive result. | ||
* -- cbu1 = byte length of usz1 buffer | ||
* -- return = remainder of split string. | ||
*/ | ||
LPSTR CharUtil_SplitFirst(_In_ LPSTR usz, _In_ CHAR ch, _Out_writes_(cbu1) LPSTR usz1, _In_ DWORD cbu1) | ||
{ | ||
UCHAR c; | ||
DWORD i = 0; | ||
while((c = usz[i]) && (c != ch) && (i < cbu1 - 2)) { | ||
usz1[i++] = c; | ||
} | ||
usz1[i] = 0; | ||
return usz[i] ? &usz[i + 1] : ""; | ||
} | ||
|
||
/* | ||
* Split a string into a list of strings at the delimiter characters. | ||
* The function allocates neccessary memory for the result array and its values. | ||
* CALLER LocalFree: *ppuszArray | ||
* -- usz = utf-8/ascii string to split. | ||
* -- chDelimiter = character to split at. | ||
* -- pcArray = pointer to receive number of strings in result array. | ||
* -- ppuszArray = pointer to receive result array. | ||
* -- return = remainder of split string. | ||
*/ | ||
_Success_(return) | ||
BOOL CharUtil_SplitList(_In_opt_ LPSTR usz, _In_ CHAR chDelimiter, _Out_ PDWORD pcArray, _Out_ LPSTR **ppuszArray) | ||
{ | ||
UCHAR c; | ||
LPSTR *pszResult; | ||
DWORD cch = 0, cDelim = 1, cDelimResult = 0; | ||
*pcArray = 0; | ||
*ppuszArray = NULL; | ||
if(!usz) { return FALSE; } | ||
// count total length and # of delimiters: | ||
while((c = usz[cch])) { | ||
if(c == chDelimiter) { cDelim++; } | ||
cch++; | ||
} | ||
// allocate result array: | ||
if(!(pszResult = LocalAlloc(LMEM_ZEROINIT, cDelim * sizeof(LPSTR) + cch + 1))) { return FALSE; } | ||
memcpy(pszResult + cDelim, usz, cch); | ||
usz = (LPSTR)(pszResult + cDelim); | ||
// split string: | ||
pszResult[cDelimResult++] = usz; | ||
while((c = usz[0]) && (cDelimResult < cDelim)) { | ||
if(c == chDelimiter) { | ||
usz[0] = 0; | ||
pszResult[cDelimResult++] = usz + 1; | ||
} | ||
usz++; | ||
} | ||
// set out parameters: | ||
*ppuszArray = pszResult; | ||
*pcArray = cDelim; | ||
return TRUE; | ||
} | ||
|
||
/* | ||
* Split a "path" string into two at the first slash/backslash character. | ||
* The 1st string is returned in the pusz1 caller-allocated buffer. The | ||
* remainder is returned as return data (is a sub-string of wsz). If no | ||
* remainder is returned as return data (is a sub-string of usz). If no | ||
* 2nd string is found null-terminator character is returned (NB! not as NULL). | ||
* -- usz = utf-8/ascii string to split. | ||
* -- usz1 = buffer to receive result. | ||
|
@@ -1162,6 +1364,81 @@ QWORD CharUtil_HashPathFsW(_In_ LPCWSTR wszPath) | |
return CharUtil_HashPathFs_Internal(uszPath); | ||
} | ||
|
||
/* | ||
* Compare multiple strings with a CharUtil_Str* compare function. | ||
* If at least one comparison is TRUE return TRUE - otherwise FALSE. | ||
* -- pfnStrCmp | ||
* -- usz1 | ||
* -- fCaseInsensitive | ||
* -- cStr | ||
* -- | ||
* ... | ||
* -- return | ||
*/ | ||
BOOL CharUtil_StrCmpAny(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, ...) | ||
{ | ||
va_list arglist; | ||
if(!pfnStrCmp) { return FALSE; } | ||
va_start(arglist, cStr); | ||
while(cStr) { | ||
if(pfnStrCmp(usz1, va_arg(arglist, LPSTR), fCaseInsensitive)) { | ||
va_end(arglist); | ||
return TRUE; | ||
} | ||
cStr--; | ||
} | ||
va_end(arglist); | ||
return FALSE; | ||
} | ||
|
||
/* | ||
* Compare multiple strings with a CharUtil_Str* compare function. | ||
* If at least one comparison is TRUE return TRUE - otherwise FALSE. | ||
* -- pfnStrCmp | ||
* -- usz1 | ||
* -- fCaseInsensitive | ||
* -- cStr | ||
* -- pStr | ||
* -- return | ||
*/ | ||
BOOL CharUtil_StrCmpAnyEx(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, _In_ LPSTR *pStr) | ||
{ | ||
if(!pfnStrCmp) { return FALSE; } | ||
while(cStr) { | ||
if(pfnStrCmp(usz1, pStr[--cStr], fCaseInsensitive)) { | ||
return TRUE; | ||
} | ||
} | ||
return FALSE; | ||
} | ||
|
||
/* | ||
* Compare multiple strings with a CharUtil_Str* compare function. | ||
* If all comparisons are TRUE return TRUE - otherwise FALSE. | ||
* -- pfnStrCmp | ||
* -- usz1 | ||
* -- fCaseInsensitive | ||
* -- cStr | ||
* -- | ||
* ... | ||
* -- return | ||
*/ | ||
BOOL CharUtil_StrCmpAll(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, ...) | ||
{ | ||
va_list arglist; | ||
if(!pfnStrCmp) { return FALSE; } | ||
va_start(arglist, cStr); | ||
while(cStr) { | ||
if(!pfnStrCmp(usz1, va_arg(arglist, LPSTR), fCaseInsensitive)) { | ||
va_end(arglist); | ||
return FALSE; | ||
} | ||
cStr--; | ||
} | ||
va_end(arglist); | ||
return TRUE; | ||
} | ||
|
||
/* | ||
* Checks if a string ends with a certain substring. | ||
* -- usz | ||
|
@@ -1181,6 +1458,42 @@ BOOL CharUtil_StrEndsWith(_In_opt_ LPSTR usz, _In_opt_ LPSTR uszEndsWith, _In_ B | |
(0 == strcmp(usz + cch - cchEndsWith, uszEndsWith)); | ||
} | ||
|
||
/* | ||
* Checks if a string starts with a certain substring. | ||
* -- usz | ||
* -- uszStartsWith | ||
* -- fCaseInsensitive | ||
* -- return | ||
*/ | ||
BOOL CharUtil_StrStartsWith(_In_opt_ LPSTR usz, _In_opt_ LPSTR uszStartsWith, _In_ BOOL fCaseInsensitive) | ||
{ | ||
if(!usz || !uszStartsWith) { return FALSE; } | ||
if(fCaseInsensitive) { | ||
return (0 == _strnicmp(usz, uszStartsWith, strlen(uszStartsWith))); | ||
} else { | ||
return (0 == strncmp(usz, uszStartsWith, strlen(uszStartsWith))); | ||
} | ||
} | ||
|
||
/* | ||
* Checks if a string equals another string. | ||
* -- usz1 | ||
* -- usz2 | ||
* -- fCaseInsensitive | ||
* -- return | ||
*/ | ||
BOOL CharUtil_StrEquals(_In_opt_ LPSTR usz, _In_opt_ LPSTR usz2, _In_ BOOL fCaseInsensitive) | ||
{ | ||
if(!usz || !usz2) { return FALSE; } | ||
if(fCaseInsensitive) { | ||
return (0 == _stricmp(usz, usz2)); | ||
} else { | ||
return (0 == strcmp(usz, usz2)); | ||
} | ||
} | ||
|
||
|
||
|
||
/* | ||
* Compare a wide-char string to a utf-8 string. | ||
* NB! only the first 2*MAX_PATH characters are compared. | ||
|
Oops, something went wrong.