Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preventing cutting unicode characters in half #77

Merged
merged 9 commits into from
Jan 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 57 additions & 132 deletions .semaphore/semaphore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ blocks:
- make test

- name: "Shell Executor E2E tests"
dependencies:
- "Tests"
dependencies: []
task:
env_vars:
- name: GO111MODULE
value: "on"

prologue:
commands:
- sudo rm /etc/docker/daemon.json
- sudo service docker restart
- sem-version go 1.11
- checkout
- go version
Expand All @@ -47,49 +48,27 @@ blocks:
- docker exec -ti agent cat /tmp/agent_log

jobs:
- name: Hello World
commands:
- make e2e TEST=shell/hello_world

- name: Env Vars
commands:
- make e2e TEST=shell/env_vars

- name: Failed Job
commands:
- make e2e TEST=shell/failed_job

- name: Job Stopping
commands:
- make e2e TEST=shell/job_stopping

- name: File Injection
commands:
- make e2e TEST=shell/file_injection

- name: File Injection Broken File Mode
commands:
- make e2e TEST=shell/file_injection_broken_file_mode

- name: Stty Restoration
commands:
- make e2e TEST=shell/stty_restoration

- name: Epilogue On Pass
commands:
- make e2e TEST=shell/epilogue_on_pass

- name: Epilogue On Fail
commands:
- make e2e TEST=shell/epilogue_on_fail

- name: SSH Jump Point
commands:
- make e2e TEST=shell/ssh_jump_points

- name: "Docker Executor E2E tests"
dependencies:
- "Tests"
- name: Shell
commands:
- "make e2e TEST=shell/$TEST"
matrix:
- env_var: TEST
values:
- env_vars
- failed_job
- job_stopping
- file_injection
- file_injection_broken_file_mode
- stty_restoration
- epilogue_on_pass
- epilogue_on_fail
- ssh_jump_points
- unicode
- unknown_command
- broken_unicode

- name: "Docker Executor E2E"
dependencies: []
task:
secrets:
- name: aws-ecr-payground
Expand All @@ -103,6 +82,8 @@ blocks:

prologue:
commands:
- sudo rm /etc/docker/daemon.json
- sudo service docker restart
- sem-version go 1.11
- checkout
- go version
Expand All @@ -114,93 +95,37 @@ blocks:
- docker exec -ti agent cat /tmp/agent_log

jobs:
- name: Hello World
commands:
- make e2e TEST=docker/hello_world

- name: Env Vars
commands:
- make e2e TEST=docker/env_vars

- name: Failed Job
commands:
- make e2e TEST=docker/failed_job

- name: Job Stopping
commands:
- make e2e TEST=docker/job_stopping

- name: File Injection
commands:
- make e2e TEST=docker/file_injection

- name: File Injection Broken File Mode
commands:
- make e2e TEST=docker/file_injection_broken_file_mode

- name: Stty Restoration
commands:
- make e2e TEST=docker/stty_restoration

- name: Epilogue On Pass
commands:
- make e2e TEST=docker/epilogue_on_pass

- name: Epilogue On Fail
commands:
- make e2e TEST=docker/epilogue_on_fail

- name: Docker In Docker
commands:
- make e2e TEST=docker/docker_in_docker

- name: Container Env Vars
commands:
- make e2e TEST=docker/container_env_vars

- name: DockerHub Login
commands:
- make e2e TEST=docker/dockerhub_private_image

- name: DockerRegistry Login
commands:
- make e2e TEST=docker/docker_registry_private_image

- name: ECR Login
commands:
- make e2e TEST=docker/docker_private_image_ecr

- name: GCR Login
commands:
- make e2e TEST=docker/docker_private_image_gcr

- name: DockerHub Login Failed
commands:
- make e2e TEST=docker/dockerhub_private_image_bad_creds

- name: DockerRegistry Login Failed
commands:
- make e2e TEST=docker/docker_registry_private_image_bad_creds

- name: ECR Login Failed
commands:
- make e2e TEST=docker/docker_private_image_ecr_bad_creds

- name: GCR Login Failed
commands:
- make e2e TEST=docker/docker_private_image_gcr_bad_creds

- name: SSH Jump Point
commands:
- make e2e TEST=docker/ssh_jump_points

- name: "No Bash"
commands:
- make e2e TEST=docker/no_bash

- name: "Container custom name"
commands:
- make e2e TEST=docker/container_custom_name
- name: Docker
commands:
- "make e2e TEST=docker/$TEST"
matrix:
- env_var: TEST
values:
- hello_world
- env_vars
- failed_job
- job_stopping
- file_injection
- file_injection_broken_file_mode
- stty_restoration
- epilogue_on_pass
- epilogue_on_fail
- docker_in_docker
- container_env_vars
- dockerhub_private_image
- docker_registry_private_image
- docker_private_image_ecr
- docker_private_image_gcr
- dockerhub_private_image_bad_creds
- docker_registry_private_image_bad_creds
- docker_private_image_ecr_bad_creds
- docker_private_image_gcr_bad_creds
- ssh_jump_points
- no_bash
- container_custom_name
- unicode
- unknown_command
- broken_unicode

promotions:
- name: Release
Expand Down
61 changes: 53 additions & 8 deletions pkg/shell/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func NewProcess(cmd string, tempStoragePath string, shell io.Writer, tty *os.Fil
panic("Invalid TTY")
}

commandEndRegex := regexp.MustCompile(endMark + " " + `(\d)` + "[\r\n]+")
commandEndRegex := regexp.MustCompile(endMark + " " + `(\d+)` + "[\r\n]+")

return &Process{
Command: cmd,
Expand All @@ -74,16 +74,61 @@ func (p *Process) OnStdout(callback func(string)) {
}

func (p *Process) StreamToStdout() {
for len(p.outputBuffer) > 100 || time.Now().Sub(p.lastStream) > 100*time.Millisecond {
l := 100
//
// The output is buffered in the outputBuffer as it comes in from the TTY
// device.
//
// Ideally, we should strive to flush the output to the logfile as an event
// when there are enough data to be sent. "Enough data" in this context
// should satisfy the following criteria:
//
// - If there is more than 100 characters in the buffer
//
// - If there is less than 100 characters in the buffer, but they were in
// the buffer for more than 100 milisecond. The reasoning here is that
// it should take no more than 100 milliseconds for the TTY to flush its
// output.
//
// - If the UTF-8 sequence is complete. Cutting the UTF-8 sequence in half
// leads to undefined (?) characters in the UI.
//

maxTimeSinceLastFlush := 100 * time.Millisecond

for len(p.outputBuffer) > 100 || time.Now().Sub(p.lastStream) > maxTimeSinceLastFlush {
cutLength := 100

if len(p.outputBuffer) < cutLength {
cutLength = len(p.outputBuffer)
}

if len(p.outputBuffer) < l {
l = len(p.outputBuffer)
if time.Now().Sub(p.lastStream) < maxTimeSinceLastFlush {
//
// The unicode continuation sign is marked by the highest (8th) bit.
// If the bit is set, it means that the unicode character is not yet
// finished.
//
// In the bellow loop, we are cutting of the last 3 charactes in case
// they are marked as the unicode continuation characters.
//
// An unicode sequence can't be longer than 4 bytes
//
unicodeContinuationMask := uint(1 << 7)
Copy link

@dnozay dnozay Jan 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image credit: https://en.wikipedia.org/wiki/UTF-8

image

bug: continuation bytes start with 10xxxxxx; 1xxxxxxx also include leading bytes.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This happens very frequently when doing box drawing

┏┳━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━
┃┃ TEST Report
┗┻━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━

Box drawing characters are 3 bytes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's it! Thanks for finding the bug and sharing the solution. 🙇

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Work on the fix started here #79.

Copy link
Contributor Author

@shiroyasha shiroyasha Jan 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dnozay I started with the implementation of the patch, but unfortunately, I had a hard time replicating the issue in the Agent's tests.

My current assumption is that the root of the problem is not directly in the Agent, but upstream in the log processing service also. We are investigating further.

I'll update as soon as we find something.


for i := 0; i < 4; i++ {
if uint(p.outputBuffer[cutLength-1])&unicodeContinuationMask != unicodeContinuationMask {
break
}

cutLength--
}
}

output := make([]byte, l)
copy(output, p.outputBuffer[0:l])
p.outputBuffer = p.outputBuffer[l:]
// Flushing the output to the logfile starts here

output := make([]byte, cutLength)
copy(output, p.outputBuffer[0:cutLength])
p.outputBuffer = p.outputBuffer[cutLength:]

out := strings.Replace(string(output), "\r\n", "\n", -1)

Expand Down
2 changes: 2 additions & 0 deletions test/e2e.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# rubocop:disable all

Encoding.default_external = Encoding::UTF_8

require 'tempfile'
require 'json'
require 'timeout'
Expand Down
66 changes: 66 additions & 0 deletions test/e2e/docker/broken_unicode.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/ruby

Encoding.default_external = Encoding::UTF_8

# rubocop:disable all

require_relative '../../e2e'

start_job <<-JSON
{
"id": "#{$JOB_ID}",

"executor": "dockercompose",

"compose": {
"containers": [
{
"name": "main",
"image": "ruby:2.6"
}
]
},

"env_vars": [],
"files": [],

"commands": [
{ "directive": "echo | awk '{ printf(\\\"%c%c%c%c%c\\\", 150, 150, 150, 150, 150) }'"}
],

"epilogue_always_commands": [],

"callbacks": {
"finished": "https://httpbin.org/status/200",
"teardown_finished": "https://httpbin.org/status/200"
}
}
JSON

wait_for_job_to_finish

assert_job_log <<-LOG
{"event":"job_started", "timestamp":"*"}

{"event":"cmd_started", "timestamp":"*", "directive":"Pulling docker images..."}
*** LONG_OUTPUT ***
{"event":"cmd_finished", "timestamp":"*", "directive":"Pulling docker images...","event":"cmd_finished","exit_code":0,"finished_at":"*","started_at":"*","timestamp":"*"}

{"event":"cmd_started", "timestamp":"*", "directive":"Starting the docker image..."}
{"event":"cmd_output", "timestamp":"*", "output":"Starting a new bash session.\\n"}
{"event":"cmd_finished", "timestamp":"*", "directive":"Starting the docker image...","event":"cmd_finished","exit_code":0,"finished_at":"*","started_at":"*","timestamp":"*"}

{"event":"cmd_started", "timestamp":"*", "directive":"Exporting environment variables"}
{"event":"cmd_finished", "timestamp":"*", "directive":"Exporting environment variables","exit_code":0,"finished_at":"*","started_at":"*"}
{"event":"cmd_started", "timestamp":"*", "directive":"Injecting Files"}
{"event":"cmd_finished", "timestamp":"*", "directive":"Injecting Files","exit_code":0,"finished_at":"*","started_at":"*"}

{"event":"cmd_started", "timestamp":"*", "directive": "echo | awk '{ printf(\\\"%c%c%c%c%c\\\", 150, 150, 150, 150, 150) }'"}
{"event":"cmd_output", "timestamp":"*", "output":"\ufffd\ufffd\ufffd\ufffd\ufffd"}
{"event":"cmd_finished", "timestamp":"*", "directive": "echo | awk '{ printf(\\\"%c%c%c%c%c\\\", 150, 150, 150, 150, 150) }'","exit_code":0,"finished_at":"*","started_at":"*"}

{"event":"cmd_started", "timestamp":"*", "directive":"export SEMAPHORE_JOB_RESULT=passed"}
{"event":"cmd_finished", "timestamp":"*", "directive":"export SEMAPHORE_JOB_RESULT=passed","exit_code":0,"finished_at":"*","started_at":"*"}

{"event":"job_finished", "timestamp":"*", "result":"passed"}
LOG
Loading