diff --git a/lib/pure/includes/osenv.nim b/lib/pure/includes/osenv.nim index 4a776eb78756..e04e14cec1d3 100644 --- a/lib/pure/includes/osenv.nim +++ b/lib/pure/includes/osenv.nim @@ -42,14 +42,18 @@ when not defined(nimscript): else: - proc c_getenv(env: cstring): cstring {. - importc: "getenv", header: "".} when defined(windows): proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "".} from std/private/win_setenv import setEnvImpl + proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", + header: "".} + proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString) else: + proc c_getenv(env: cstring): cstring {. + importc: "getenv", header: "".} proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "".} - proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "".} + proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "".} + proc getEnvImpl(env: cstring): cstring = c_getenv(env) proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} = ## Returns the value of the `environment variable`:idx: named `key`. @@ -67,7 +71,7 @@ when not defined(nimscript): assert getEnv("unknownEnv") == "" assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist" - let env = c_getenv(key) + let env = getEnvImpl(key) if env == nil: return default result = $env @@ -83,7 +87,7 @@ when not defined(nimscript): runnableExamples: assert not existsEnv("unknownEnv") - return c_getenv(key) != nil + return getEnvImpl(key) != nil proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} = ## Sets the value of the `environment variable`:idx: named `key` to `val`. diff --git a/lib/std/envvars.nim b/lib/std/envvars.nim index 5b135cbd3c08..d7706c17d1db 100644 --- a/lib/std/envvars.nim +++ b/lib/std/envvars.nim @@ -57,15 +57,19 @@ when not defined(nimscript): else: - proc c_getenv(env: cstring): cstring {. - importc: "getenv", header: "".} when defined(windows): proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "".} from std/private/win_setenv import setEnvImpl import winlean + proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", + header: "".} + proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString) else: + proc c_getenv(env: cstring): cstring {. + importc: "getenv", header: "".} proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "".} proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "".} + proc getEnvImpl(env: cstring): cstring = c_getenv(env) proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} = ## Returns the value of the `environment variable`:idx: named `key`. @@ -83,7 +87,7 @@ when not defined(nimscript): assert getEnv("unknownEnv") == "" assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist" - let env = c_getenv(key) + let env = getEnvImpl(key) if env == nil: return default result = $env @@ -99,7 +103,7 @@ when not defined(nimscript): runnableExamples: assert not existsEnv("unknownEnv") - return c_getenv(key) != nil + return getEnvImpl(key) != nil proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} = ## Sets the value of the `environment variable`:idx: named `key` to `val`. diff --git a/lib/std/private/win_setenv.nim b/lib/std/private/win_setenv.nim index 0dfe0ed46dc0..89bb0421fd9e 100644 --- a/lib/std/private/win_setenv.nim +++ b/lib/std/private/win_setenv.nim @@ -25,27 +25,30 @@ when not defined(windows): discard else: type wchar_t {.importc: "wchar_t".} = int16 - proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {. - stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.} + proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {. + stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.} # same as winlean.setEnvironmentVariableA - proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} - proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "".} - proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "".} + proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "".} + proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "".} + proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "".} var errno {.importc, header: "".}: cint - var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t + var genviron {.importc: "_environ".}: ptr ptr char # xxx `ptr UncheckedArray[WideCString]` did not work - proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "".} + proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "".} # xxx cint vs errno_t? proc setEnvImpl*(name: string, value: string, overwrite: cint): cint = const EINVAL = cint(22) - if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0 + let wideName = newWideCString(name) + if overwrite == 0 and c_wgetenv(wideName) != nil: + return 0 + if value != "": let envstring = name & "=" & value - let e = c_putenv(cstring(envstring)) + let e = c_wputenv(newWideCString(envstring)) if e != 0: errno = EINVAL return -1 @@ -57,40 +60,44 @@ else: so we have to do these terrible things. ]# let envstring = name & "= " - if c_putenv(cstring(envstring)) != 0: + if c_wputenv(newWideCString(envstring)) != 0: errno = EINVAL return -1 # Here lies the documentation we blatently ignore to make this work. - var s = c_getenv(cstring(name)) - s[0] = '\0' + var s = c_wgetenv(wideName) + s[0] = Utf16Char('\0') #[ This would result in a double null termination, which normally signifies the end of the environment variable list, so we stick a completely empty environment variable into the list instead. ]# - s = c_getenv(cstring(name)) - s[1] = '=' + s = c_wgetenv(wideName) + s[1] = Utf16Char('=') #[ - If gWenviron is null, the wide environment has not been initialized + If genviron is null, the MBCS environment has not been initialized yet, and we don't need to try to update it. We have to do this otherwise - we'd be forcing the initialization and maintenance of the wide environment + we'd be forcing the initialization and maintenance of the MBCS environment even though it's never actually used in most programs. ]# - if gWenviron != nil: - # var buf: array[MAX_ENV + 1, WideCString] - let requiredSize = mbstowcs(nil, cstring(name), 0).int - var buf = newSeq[Utf16Char](requiredSize + 1) - let buf2 = cast[ptr wchar_t](buf[0].addr) - if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)): - errno = EINVAL - return -1 - var ptrToEnv = cast[WideCString](c_wgetenv(buf2)) - ptrToEnv[0] = '\0'.Utf16Char - ptrToEnv = cast[WideCString](c_wgetenv(buf2)) - ptrToEnv[1] = '='.Utf16Char + if genviron != nil: + + # wcstombs returns `high(csize_t)` if any characters cannot be represented + # in the current codepage. Skip updating MBCS environment in this case. + # For some reason, second `wcstombs` can find non-convertible characters + # that the first `wcstombs` cannot. + let requiredSizeS = wcstombs(nil, wideName, 0) + if requiredSizeS != high(csize_t): + let requiredSize = requiredSizeS.int + var buf = newSeq[char](requiredSize + 1) + let buf2 = buf[0].addr + if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t): + var ptrToEnv = c_getenv(buf2) + ptrToEnv[0] = '\0' + ptrToEnv = c_getenv(buf2) + ptrToEnv[1] = '=' # And now, we have to update the outer environment to have a proper empty value. - if setEnvironmentVariableA(cstring(name), cstring(value)) == 0: + if setEnvironmentVariableW(wideName, value.newWideCString) == 0: errno = EINVAL return -1 return 0 diff --git a/tests/destructor/tnewruntime_misc.nim b/tests/destructor/tnewruntime_misc.nim index ac061f2f7a1b..21c70557dbae 100644 --- a/tests/destructor/tnewruntime_misc.nim +++ b/tests/destructor/tnewruntime_misc.nim @@ -137,7 +137,16 @@ proc xx(xml: string): MyObject = discard xx("test") -echo getAllocStats() - s1 + +# Windows has 1 extra allocation in `getEnv` - there it allocates parameter to +# `_wgetenv` (WideCString). Therefore subtract by 1 to match other OSes' +# allocation. +when defined(windows): + import std/importutils + privateAccess(AllocStats) + echo getAllocStats() - s1 - AllocStats(allocCount: 1, deallocCount: 1) +else: + echo getAllocStats() - s1 # bug #13457 var s = "abcde" diff --git a/tests/stdlib/tenvvars.nim b/tests/stdlib/tenvvars.nim index 406aa3230b78..b39ce5f72a30 100644 --- a/tests/stdlib/tenvvars.nim +++ b/tests/stdlib/tenvvars.nim @@ -8,9 +8,12 @@ import std/envvars from std/sequtils import toSeq import stdtest/testutils +# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386) +const unicodeUtf8 = "\xc3\x86" + template main = block: # delEnv, existsEnv, getEnv, envPairs - for val in ["val", ""]: # ensures empty val works too + for val in ["val", "", unicodeUtf8]: # ensures empty val works too const key = "NIM_TESTS_TOSENV_KEY" doAssert not existsEnv(key) @@ -44,9 +47,12 @@ template main = main() +when defined(windows): + proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "".} +proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} + when not defined(js) and not defined(nimscript): block: # bug #18533 - proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} var thr: Thread[void] proc threadFunc {.thread.} = putEnv("foo", "fooVal2") @@ -54,6 +60,97 @@ when not defined(js) and not defined(nimscript): doAssert getEnv("foo") == "fooVal1" createThread(thr, threadFunc) joinThreads(thr) - doAssert getEnv("foo") == $c_getenv("foo") + when defined(windows): + doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString) + else: + doAssert getEnv("foo") == $c_getenv("foo".cstring) doAssertRaises(OSError): delEnv("foo=bar") + +when defined(windows): + import std/encodings + + proc c_putenv(env: cstring): int32 {.importc: "putenv", header: "".} + proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "".} + + block: # Bug #20083 + # These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode + # characters correctly. This means that module X in the process calling the + # CRT environment variable API will get the correct string. Raw CRT API + # calls below represent module X. + + # Getting an env. var. with unicode characters returns the correct UTF-8 + # encoded string. + block: + const envName = "twin_envvars1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Putting an env. var. with unicode characters gives the correct UTF-16 + # encoded string from low-level routine. + block: + const envName = "twin_envvars2" + putEnv(envName, unicodeUtf8) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + + # Env. name containing Unicode characters is retrieved correctly + block: + const envName = unicodeUtf8 & "1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Env. name containing Unicode characters is set correctly + block: + const envName = unicodeUtf8 & "2" + putEnv(envName, unicodeUtf8) + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + + # Env. name containing Unicode characters and empty value is set correctly + block: + const envName = unicodeUtf8 & "3" + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + + # It's hard to test on Windows code pages, because there is no "change + # a process' locale" API. + if getCurrentEncoding(true) == "windows-1252": + const + unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding + + # Test that env. var. ANSI API has correct encoding + block: + const + envName = unicodeUtf8 & "4" + envNameAnsi = unicodeAnsi & "4" + putEnv(envName, unicodeUtf8) + doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi + + block: + const + envName = unicodeUtf8 & "5" + envNameAnsi = unicodeAnsi & "5" + doAssert c_putenv((envNameAnsi & "=" & unicodeAnsi).cstring) == 0 + doAssert getEnv(envName) == unicodeUtf8 + + # Env. name containing Unicode characters and empty value is set correctly; + # and, if env. name. characters cannot be represented in codepage, don't + # raise an error. + # + # `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The + # windows-1250 locale has no representation of `abreveUtf8` below, so the + # conversion will fail, but this must not be fatal. It is expected that the + # routine ignores updating MBCS environment (`environ` global) and carries + # on. + block: + const + # "LATIN SMALL LETTER A WITH BREVE" in UTF-8 + abreveUtf8 = "\xc4\x83" + envName = abreveUtf8 & "6" + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + doAssert getEnv(envName) == "" diff --git a/tests/stdlib/tosenv.nim b/tests/stdlib/tosenv.nim index 0a50031a116d..310bfe202a45 100644 --- a/tests/stdlib/tosenv.nim +++ b/tests/stdlib/tosenv.nim @@ -8,9 +8,12 @@ import std/os from std/sequtils import toSeq import stdtest/testutils +# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386) +const unicodeUtf8 = "\xc3\x86" + template main = block: # delEnv, existsEnv, getEnv, envPairs - for val in ["val", ""]: # ensures empty val works too + for val in ["val", "", unicodeUtf8]: # ensures empty val works too const key = "NIM_TESTS_TOSENV_KEY" doAssert not existsEnv(key) @@ -45,9 +48,12 @@ template main = static: main() main() +when defined(windows): + proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "".} +proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} + when not defined(js) and not defined(nimscript): block: # bug #18533 - proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} var thr: Thread[void] proc threadFunc {.thread.} = putEnv("foo", "fooVal2") @@ -55,6 +61,97 @@ when not defined(js) and not defined(nimscript): doAssert getEnv("foo") == "fooVal1" createThread(thr, threadFunc) joinThreads(thr) - doAssert getEnv("foo") == $c_getenv("foo") + when defined(windows): + doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString) + else: + doAssert getEnv("foo") == $c_getenv("foo".cstring) doAssertRaises(OSError): delEnv("foo=bar") + +when defined(windows) and not defined(nimscript): + import std/encodings + + proc c_putenv(env: cstring): int32 {.importc: "putenv", header: "".} + proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "".} + + block: # Bug #20083 + # These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode + # characters correctly. This means that module X in the process calling the + # CRT environment variable API will get the correct string. Raw CRT API + # calls below represent module X. + + # Getting an env. var. with unicode characters returns the correct UTF-8 + # encoded string. + block: + const envName = "twin_envvars1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Putting an env. var. with unicode characters gives the correct UTF-16 + # encoded string from low-level routine. + block: + const envName = "twin_envvars2" + putEnv(envName, unicodeUtf8) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + + # Env. name containing Unicode characters is retrieved correctly + block: + const envName = unicodeUtf8 & "1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Env. name containing Unicode characters is set correctly + block: + const envName = unicodeUtf8 & "2" + putEnv(envName, unicodeUtf8) + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + + # Env. name containing Unicode characters and empty value is set correctly + block: + const envName = unicodeUtf8 & "3" + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + + # It's hard to test on Windows code pages, because there is no "change + # a process' locale" API. + if getCurrentEncoding(true) == "windows-1252": + const + unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding + + # Test that env. var. ANSI API has correct encoding + block: + const + envName = unicodeUtf8 & "4" + envNameAnsi = unicodeAnsi & "4" + putEnv(envName, unicodeUtf8) + doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi + + block: + const + envName = unicodeUtf8 & "5" + envNameAnsi = unicodeAnsi & "5" + doAssert c_putenv((envNameAnsi & "=" & unicodeAnsi).cstring) == 0 + doAssert getEnv(envName) == unicodeUtf8 + + # Env. name containing Unicode characters and empty value is set correctly; + # and, if env. name. characters cannot be represented in codepage, don't + # raise an error. + # + # `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The + # windows-1250 locale has no representation of `abreveUtf8` below, so the + # conversion will fail, but this must not be fatal. It is expected that the + # routine ignores updating MBCS environment (`environ` global) and carries + # on. + block: + const + # "LATIN SMALL LETTER A WITH BREVE" in UTF-8 + abreveUtf8 = "\xc4\x83" + envName = abreveUtf8 & "6" + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + doAssert getEnv(envName) == ""