update to 1d2fc8a

Narukara · Sep 9, 2023 · 0364697 · 0364697
1 parent ab60a82
commit 0364697
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 这里是 https://github.com/esp-rs/std-training 的简体中文翻译
 
-目前进度：已经翻译完成，跟踪到 f7ce2e7
+目前进度：已经翻译完成，跟踪到 1d2fc8a
 
 ---
 

diff --git a/intro/http-client/examples/http_client.rs b/intro/http-client/examples/http_client.rs
@@ -61,18 +61,70 @@ fn get(url: impl AsRef<str>) -> Result<()> {
     match status {
         200..=299 => {
             // 5. If the status is OK, read response data chunk by chunk into a buffer and print it until done.
+            //
+            // NB. There is no guarantee that chunks will be split at the boundaries of valid UTF-8
+            // sequences (in fact it is likely that they are not) so this edge case needs to be handled.
+            // However, for the purposes of clarity and brevity(?), the additional case of completely invalid
+            // UTF-8 sequences will not be handled here and is left as an exercise for the reader.
             let mut buf = [0_u8; 256];
+            // Offset into the buffer to indicate that there may still be
+            // bytes at the beginning that have not been decoded yet
+            let mut offset = 0;
+            // Keep track of the total number of bytes read to print later
+            let mut total = 0;
             let mut reader = response;
             loop {
-                if let Ok(size) = Read::read(&mut reader, &mut buf) {
+                // read into the buffer starting at the offset to not overwrite
+                // the incomplete UTF-8 sequence we put there earlier
+                if let Ok(size) = Read::read(&mut reader, &mut buf[offset..]) {
                     if size == 0 {
+                        // It might be nice to check if we have any left over bytes here (ie. the offset > 0)
+                        // as this would mean that the response ended with an invalid UTF-8 sequence, but for the
+                        // purposes of this training we are assuming that the full response will be valid UTF-8
                         break;
                     }
+                    // Update the total number of bytes read
+                    total += size;
                     // 6. Try converting the bytes into a Rust (UTF-8) string and print it.
-                    let response_text = str::from_utf8(&buf[..size])?;
-                    println!("{}", response_text);
+                    // Remember that we read into an offset and recalculate the real length
+                    // of the bytes to decode.
+                    let size_plus_offset = size + offset;
+                    match str::from_utf8(&buf[..size_plus_offset]) {
+                        Ok(text) => {
+                            // buffer contains fully valid UTF-8 data,
+                            // print it and reset the offset to 0.
+                            print!("{}", text);
+                            offset = 0;
+                        },
+                        Err(error) => {
+                            // The buffer contains incomplete UTF-8 data, we will
+                            // print the valid part, copy the invalid sequence to
+                            // the beginning of the buffer and set an offset for the
+                            // next read.
+                            //
+                            // NB. There is actually an additional case here that should be
+                            // handled in a real implementation. The Utf8Error may also contain
+                            // an error_len field indicating that there is actually an invalid UTF-8
+                            // sequence in the middle of the buffer. Such an error would not be
+                            // recoverable through our offset and copy mechanism. The result will be
+                            // that the invalid sequence will be copied to the front of the buffer and
+                            // eventually the buffer will be filled until no more bytes can be read when
+                            // the offset == buf.len(). At this point the loop will exit without reading
+                            // any more of the response.
+                            let valid_up_to = error.valid_up_to();
+                            unsafe {
+                                // It's ok to use unsafe here as the error code already told us that
+                                // the UTF-8 data up to this point is valid, so we can tell the compiler
+                                // it's fine.
+                                print!("{}", str::from_utf8_unchecked(&buf[..valid_up_to]));
+                            }
+                            buf.copy_within(valid_up_to.., 0);
+                            offset = size_plus_offset - valid_up_to;
+                        }
+                    }
                 }
             }
+            println!("Total: {} bytes", total);
         }
         _ => bail!("Unexpected response code: {}", status),
     }

diff --git a/intro/http-client/examples/https_client.rs b/intro/http-client/examples/https_client.rs
@@ -66,18 +66,39 @@ fn get(url: impl AsRef<str>) -> Result<()> {
     match status {
         200..=299 => {
             // 4. if the status is OK, read response data chunk by chunk into a buffer and print it until done
+            //
+            // NB. see http_client.rs for an explanation of the offset mechanism for handling chunks that are
+            // split in the middle of valid UTF-8 sequences. This case is encountered a lot with the given
+            // example URL.
             let mut buf = [0_u8; 256];
+            let mut offset = 0;
+            let mut total = 0;
             let mut reader = response;
             loop {
-                if let Ok(size) = Read::read(&mut reader, &mut buf) {
+                if let Ok(size) = Read::read(&mut reader, &mut buf[offset..]) {
                     if size == 0 {
                         break;
                     }
+                    total += size;
                     // 5. try converting the bytes into a Rust (UTF-8) string and print it
-                    let response_text = str::from_utf8(&buf[..size])?;
-                    println!("{}", response_text);
+                    let size_plus_offset = size + offset;
+                    match str::from_utf8(&buf[..size_plus_offset]) {
+                        Ok(text) => {
+                            print!("{}", text);
+                            offset = 0;
+                        },
+                        Err(error) => {
+                            let valid_up_to = error.valid_up_to();
+                            unsafe {
+                                print!("{}", str::from_utf8_unchecked(&buf[..valid_up_to]));
+                            }
+                            buf.copy_within(valid_up_to.., 0);
+                            offset = size_plus_offset - valid_up_to;
+                        }
+                    }
                 }
             }
+            println!("Total: {} bytes", total);
         }
         _ => bail!("Unexpected response code: {}", status),
     }