Skip to content

Commit

Permalink
Add use of Windows Wide CRT API for env. vars (#20084)
Browse files Browse the repository at this point in the history
* Add use of Windows Wide CRT API for env. vars

Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.

Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.

Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.

Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.

Add tests that test environment variables with Unicode characters in
their name or value.

* Fix test issues

Fixes

* `nim cpp` didn't compile the tests
* Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed
  with "cannot importc"

* Fix missing error check on `wcstombs`

* Fix ANSI testing errors

* Separate ANSI-related testing to their own tests, and only executing
  them if running process has a specific code page
  * Setting locale with `setlocale` was not reliable and didn't work on
    certain machines
* Add handling of a "no character representation" error in second
  `wcstombs` call

* tests/newruntime_misc: Increment allocCount

Increments overall allocations in `tnewruntime_misc` test. This is
because `getEnv` now does an additional allocation: allocation of the
UTF-16 string used as parameter to `c_wgetenv`.

* Revert "tests/newruntime_misc: Increment allocCount"

This reverts commit 4d4fe8b.

* tests/newruntime_misc: Increment allocCount on Windows

Increments overall allocations in `tnewruntime_misc` test for Windows.
This is because `getEnv` on Windows now does an additional allocation:
allocation of the UTF-16 string used as parameter to `c_wgetenv`.

* Refactor, adding suggestions from code review

Co-authored-by: Clay Sweetser <[email protected]>

* Document, adding suggestions

Co-authored-by: Clay Sweetser <[email protected]>

Co-authored-by: ringabout <[email protected]>
Co-authored-by: Clay Sweetser <[email protected]>
  • Loading branch information
3 people authored Aug 20, 2022
1 parent 641381e commit f4bbf3b
Show file tree
Hide file tree
Showing 6 changed files with 263 additions and 45 deletions.
14 changes: 9 additions & 5 deletions lib/pure/includes/osenv.nim
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,18 @@ when not defined(nimscript):

else:

proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
when defined(windows):
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
from std/private/win_setenv import setEnvImpl
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv",
header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString)
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "<stdlib.h>".}
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): cstring = c_getenv(env)

proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} =
## Returns the value of the `environment variable`:idx: named `key`.
Expand All @@ -67,7 +71,7 @@ when not defined(nimscript):
assert getEnv("unknownEnv") == ""
assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist"

let env = c_getenv(key)
let env = getEnvImpl(key)
if env == nil: return default
result = $env

Expand All @@ -83,7 +87,7 @@ when not defined(nimscript):
runnableExamples:
assert not existsEnv("unknownEnv")

return c_getenv(key) != nil
return getEnvImpl(key) != nil

proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} =
## Sets the value of the `environment variable`:idx: named `key` to `val`.
Expand Down
12 changes: 8 additions & 4 deletions lib/std/envvars.nim
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,19 @@ when not defined(nimscript):

else:

proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
when defined(windows):
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
from std/private/win_setenv import setEnvImpl
import winlean
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv",
header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString)
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "<stdlib.h>".}
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): cstring = c_getenv(env)

proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} =
## Returns the value of the `environment variable`:idx: named `key`.
Expand All @@ -83,7 +87,7 @@ when not defined(nimscript):
assert getEnv("unknownEnv") == ""
assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist"

let env = c_getenv(key)
let env = getEnvImpl(key)
if env == nil: return default
result = $env

Expand All @@ -99,7 +103,7 @@ when not defined(nimscript):
runnableExamples:
assert not existsEnv("unknownEnv")

return c_getenv(key) != nil
return getEnvImpl(key) != nil

proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} =
## Sets the value of the `environment variable`:idx: named `key` to `val`.
Expand Down
65 changes: 36 additions & 29 deletions lib/std/private/win_setenv.nim
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,30 @@ when not defined(windows): discard
else:
type wchar_t {.importc: "wchar_t".} = int16

proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {.
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.}
proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {.
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.}
# same as winlean.setEnvironmentVariableA

proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "<stdlib.h>".}
proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "<stdlib.h>".}
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}

var errno {.importc, header: "<errno.h>".}: cint
var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t
var genviron {.importc: "_environ".}: ptr ptr char
# xxx `ptr UncheckedArray[WideCString]` did not work

proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "<stdlib.h>".}
proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "<stdlib.h>".}
# xxx cint vs errno_t?

proc setEnvImpl*(name: string, value: string, overwrite: cint): cint =
const EINVAL = cint(22)
if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0
let wideName = newWideCString(name)
if overwrite == 0 and c_wgetenv(wideName) != nil:
return 0

if value != "":
let envstring = name & "=" & value
let e = c_putenv(cstring(envstring))
let e = c_wputenv(newWideCString(envstring))
if e != 0:
errno = EINVAL
return -1
Expand All @@ -57,40 +60,44 @@ else:
so we have to do these terrible things.
]#
let envstring = name & "= "
if c_putenv(cstring(envstring)) != 0:
if c_wputenv(newWideCString(envstring)) != 0:
errno = EINVAL
return -1
# Here lies the documentation we blatently ignore to make this work.
var s = c_getenv(cstring(name))
s[0] = '\0'
var s = c_wgetenv(wideName)
s[0] = Utf16Char('\0')
#[
This would result in a double null termination, which normally signifies the
end of the environment variable list, so we stick a completely empty
environment variable into the list instead.
]#
s = c_getenv(cstring(name))
s[1] = '='
s = c_wgetenv(wideName)
s[1] = Utf16Char('=')
#[
If gWenviron is null, the wide environment has not been initialized
If genviron is null, the MBCS environment has not been initialized
yet, and we don't need to try to update it. We have to do this otherwise
we'd be forcing the initialization and maintenance of the wide environment
we'd be forcing the initialization and maintenance of the MBCS environment
even though it's never actually used in most programs.
]#
if gWenviron != nil:
# var buf: array[MAX_ENV + 1, WideCString]
let requiredSize = mbstowcs(nil, cstring(name), 0).int
var buf = newSeq[Utf16Char](requiredSize + 1)
let buf2 = cast[ptr wchar_t](buf[0].addr)
if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)):
errno = EINVAL
return -1
var ptrToEnv = cast[WideCString](c_wgetenv(buf2))
ptrToEnv[0] = '\0'.Utf16Char
ptrToEnv = cast[WideCString](c_wgetenv(buf2))
ptrToEnv[1] = '='.Utf16Char
if genviron != nil:

# wcstombs returns `high(csize_t)` if any characters cannot be represented
# in the current codepage. Skip updating MBCS environment in this case.
# For some reason, second `wcstombs` can find non-convertible characters
# that the first `wcstombs` cannot.
let requiredSizeS = wcstombs(nil, wideName, 0)
if requiredSizeS != high(csize_t):
let requiredSize = requiredSizeS.int
var buf = newSeq[char](requiredSize + 1)
let buf2 = buf[0].addr
if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t):
var ptrToEnv = c_getenv(buf2)
ptrToEnv[0] = '\0'
ptrToEnv = c_getenv(buf2)
ptrToEnv[1] = '='

# And now, we have to update the outer environment to have a proper empty value.
if setEnvironmentVariableA(cstring(name), cstring(value)) == 0:
if setEnvironmentVariableW(wideName, value.newWideCString) == 0:
errno = EINVAL
return -1
return 0
11 changes: 10 additions & 1 deletion tests/destructor/tnewruntime_misc.nim
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,16 @@ proc xx(xml: string): MyObject =


discard xx("test")
echo getAllocStats() - s1

# Windows has 1 extra allocation in `getEnv` - there it allocates parameter to
# `_wgetenv` (WideCString). Therefore subtract by 1 to match other OSes'
# allocation.
when defined(windows):
import std/importutils
privateAccess(AllocStats)
echo getAllocStats() - s1 - AllocStats(allocCount: 1, deallocCount: 1)
else:
echo getAllocStats() - s1

# bug #13457
var s = "abcde"
Expand Down
103 changes: 100 additions & 3 deletions tests/stdlib/tenvvars.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ import std/envvars
from std/sequtils import toSeq
import stdtest/testutils

# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386)
const unicodeUtf8 = "\xc3\x86"

template main =
block: # delEnv, existsEnv, getEnv, envPairs
for val in ["val", ""]: # ensures empty val works too
for val in ["val", "", unicodeUtf8]: # ensures empty val works too
const key = "NIM_TESTS_TOSENV_KEY"
doAssert not existsEnv(key)

Expand Down Expand Up @@ -44,16 +47,110 @@ template main =

main()

when defined(windows):
proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}

when not defined(js) and not defined(nimscript):
block: # bug #18533
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
var thr: Thread[void]
proc threadFunc {.thread.} = putEnv("foo", "fooVal2")

putEnv("foo", "fooVal1")
doAssert getEnv("foo") == "fooVal1"
createThread(thr, threadFunc)
joinThreads(thr)
doAssert getEnv("foo") == $c_getenv("foo")
when defined(windows):
doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString)
else:
doAssert getEnv("foo") == $c_getenv("foo".cstring)

doAssertRaises(OSError): delEnv("foo=bar")

when defined(windows):
import std/encodings

proc c_putenv(env: cstring): int32 {.importc: "putenv", header: "<stdlib.h>".}
proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "<stdlib.h>".}

block: # Bug #20083
# These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode
# characters correctly. This means that module X in the process calling the
# CRT environment variable API will get the correct string. Raw CRT API
# calls below represent module X.

# Getting an env. var. with unicode characters returns the correct UTF-8
# encoded string.
block:
const envName = "twin_envvars1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8

# Putting an env. var. with unicode characters gives the correct UTF-16
# encoded string from low-level routine.
block:
const envName = "twin_envvars2"
putEnv(envName, unicodeUtf8)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8

# Env. name containing Unicode characters is retrieved correctly
block:
const envName = unicodeUtf8 & "1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8

# Env. name containing Unicode characters is set correctly
block:
const envName = unicodeUtf8 & "2"
putEnv(envName, unicodeUtf8)
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8

# Env. name containing Unicode characters and empty value is set correctly
block:
const envName = unicodeUtf8 & "3"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""

# It's hard to test on Windows code pages, because there is no "change
# a process' locale" API.
if getCurrentEncoding(true) == "windows-1252":
const
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding

# Test that env. var. ANSI API has correct encoding
block:
const
envName = unicodeUtf8 & "4"
envNameAnsi = unicodeAnsi & "4"
putEnv(envName, unicodeUtf8)
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi

block:
const
envName = unicodeUtf8 & "5"
envNameAnsi = unicodeAnsi & "5"
doAssert c_putenv((envNameAnsi & "=" & unicodeAnsi).cstring) == 0
doAssert getEnv(envName) == unicodeUtf8

# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# windows-1250 locale has no representation of `abreveUtf8` below, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const
# "LATIN SMALL LETTER A WITH BREVE" in UTF-8
abreveUtf8 = "\xc4\x83"
envName = abreveUtf8 & "6"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""
Loading

0 comments on commit f4bbf3b

Please sign in to comment.