Cleanup as prepared to merge unicode support

Changed the string constructors to use nio.StandardCharset which ensures that it will not throw an error. Fixed a bug introduced in SystemIO.writeToFile where the number returned was not the number of bytes written but the number of characters written. Improved the errors generated about \uXXXX in strings General cleanup of the loop to write bytes in Assembler.storeStrings
TheThirdOne · Jun 4, 2019 · 10e9c6f · 10e9c6f
1 parent 1d5cdc3
commit 10e9c6f
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 53 deletions.
diff --git a/rars/assembler/Assembler.java b/rars/assembler/Assembler.java
@@ -9,10 +9,10 @@
 import rars.util.Binary;
 import rars.util.SystemIO;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collections;
 
-import java.io.UnsupportedEncodingException;
 
 /*
  Copyright (c) 2003-2012,  Pete Sanderson and Kenneth Vollmar
@@ -1034,7 +1034,6 @@ private void storeStrings(TokenList tokens, Directives direct, ErrorList errors)
                 char theChar;
                 for (int j = 1; j < quote.length() - 1; j++) {
                     theChar = quote.charAt(j);
-                    String strOfChar = "";
                     if (theChar == '\\') {
                         theChar = quote.charAt(++j);
                         switch (theChar) {
@@ -1070,16 +1069,14 @@ private void storeStrings(TokenList tokens, Directives direct, ErrorList errors)
                                 try{
                                     codePoint = quote.substring(j+1, j+5); //get the UTF-8 codepoint following the unicode escape sequence
                                     theChar = Character.toChars(Integer.parseInt(codePoint, 16))[0]; //converts the codepoint to single character
-                                } catch(StringIndexOutOfBoundsException | NumberFormatException e){
-                                    String invalidCodePoint = "";
-                                    int endOfCP = j + 5;    //a UTF8 codepoint is 4 bytes long 
-                                    char ch[] = {quote.charAt(++j)};
-                                    while (ch[0] != '"' & j != endOfCP){  //grab all characters after the \ u until end of string or end of codepoint
-                                        invalidCodePoint = invalidCodePoint.concat(new String(ch)); //parameter to String constructor is a char[] array
-                                        ch[0] = quote.charAt(++j);
-                                    }
+                                } catch(StringIndexOutOfBoundsException e){
+                                    String invalidCodePoint = quote.substring(j+1);
                                     errors.add(new ErrorMessage(token.getSourceProgram(), token
-                                        .getSourceLine(), token.getStartPos(), "illegal unicode escape: \"\\u" + invalidCodePoint + "\""));
+                                        .getSourceLine(), token.getStartPos(), "unicode escape \"\\u" +
+                                            invalidCodePoint + "\" is incomplete. Only escapes with 4 digits are valid."));
+                                } catch(NumberFormatException e){
+                                    errors.add(new ErrorMessage(token.getSourceProgram(), token
+                                            .getSourceLine(), token.getStartPos(), "illegal unicode escape: \"\\u" + codePoint + "\""));
                                 }
                                 j = j + 4; //skip past the codepoint for next iteration
                                 break;
@@ -1090,26 +1087,17 @@ private void storeStrings(TokenList tokens, Directives direct, ErrorList errors)
                             // codes...
                         }
                     }
-                    strOfChar = String.valueOf(theChar); //gets the string representation of the char for use with getBytes
-                    String charset = "UTF8";
-                    try{
-                        byte[] bytesOfChar = strOfChar.getBytes(charset);
-                        int lenOfArray = bytesOfChar.length;
-                        for (int k = 0; k < lenOfArray; k++){
-                            try {
-                                Globals.memory.set(this.dataAddress.get(), bytesOfChar[k],
-                                        DataTypes.CHAR_SIZE);
-                            } catch (AddressErrorException e) {
-                                errors.add(new ErrorMessage(token.getSourceProgram(), token
-                                        .getSourceLine(), token.getStartPos(), "\""
-                                        + this.dataAddress.get() + "\" is not a valid data segment address"));
-                            }
+                    byte[] bytesOfChar = String.valueOf(theChar).getBytes(StandardCharsets.UTF_8);
+                    try {
+                        for (byte b : bytesOfChar) {
+                            Globals.memory.set(this.dataAddress.get(), b,
+                                    DataTypes.CHAR_SIZE);
                             this.dataAddress.increment(DataTypes.CHAR_SIZE);
                         }
-                    } catch (UnsupportedEncodingException e) {
-                        //thrown only if the given Charset is not Supported by your JVM
-                        System.out.println("Error: " + charset + " charset is not supported by the JVM");
-                        System.exit(0);
+                    } catch (AddressErrorException e) {
+                        errors.add(new ErrorMessage(token.getSourceProgram(), token
+                                .getSourceLine(), token.getStartPos(), "\""
+                                + this.dataAddress.get() + "\" is not a valid data segment address"));
                     }
 
                 }

diff --git a/rars/riscv/syscalls/NullString.java b/rars/riscv/syscalls/NullString.java
@@ -5,8 +5,9 @@
 import rars.ProgramStatement;
 import rars.riscv.hardware.AddressErrorException;
 import rars.riscv.hardware.RegisterFile;
+
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
-import java.io.UnsupportedEncodingException;
 
 /*
 Copyright (c) 2003-2017,  Pete Sanderson,Benjamin Landers and Kenneth Vollmar
@@ -58,7 +59,7 @@ public static String get(ProgramStatement statement) throws ExitingException {
      */
     public static String get(ProgramStatement statement, String reg) throws ExitingException {
         int byteAddress = RegisterFile.getValue(reg);
-        ArrayList<Byte> utf8BytesList = new ArrayList<Byte>(); // Need an array to hold bytes
+        ArrayList<Byte> utf8BytesList = new ArrayList<>(); // Need an array to hold bytes
         try {
             utf8BytesList.add((byte) Globals.memory.getByte(byteAddress));
             while (utf8BytesList.get(utf8BytesList.size() - 1) != 0) // until null terminator
@@ -76,17 +77,6 @@ public static String get(ProgramStatement statement, String reg) throws ExitingE
             utf8Bytes[i] = utf8BytesList.get(i);
         }
 
-        //construct the string using UTF8 encoding
-        String message = "";
-        String charset = "UTF8";
-        try {
-            message = new String(utf8Bytes, charset);
-        } catch (UnsupportedEncodingException e) {
-            //thrown only if the given Charset is not supported by your JVM
-            System.out.println("Error: " + charset + " charset is not supported by the JVM");
-            System.exit(0);
-        }
-
-        return message;
+        return new String(utf8Bytes, StandardCharsets.UTF_8);
     }
 }
diff --git a/rars/util/SystemIO.java b/rars/util/SystemIO.java
@@ -263,16 +263,9 @@ public static int writeToFile(int fd, byte[] myBuffer, int lengthRequested) {
         /////////////// DPS 8-Jan-2013  ////////////////////////////////////////////////////
         /// Write to STDOUT or STDERR file descriptor while using IDE - write to Messages pane.
         if ((fd == STDOUT || fd == STDERR) && Globals.getGui() != null) {
-            String charset = "UTF8";
-            try{
-                String data = new String(myBuffer, charset); //decode the bytes using UTF-8 charset
-                Globals.getGui().getMessagesPane().postRunMessage(data);
-                return data.length();
-            } catch (UnsupportedEncodingException e){
-                //thrown only if the given Charset is not supported by your JVM
-                System.out.println("Error: " + charset + " charset is not supported by the JVM");
-                System.exit(0);
-            }           
+            String data = new String(myBuffer, StandardCharsets.UTF_8); //decode the bytes using UTF-8 charset
+            Globals.getGui().getMessagesPane().postRunMessage(data);
+            return myBuffer.length; // data.length would not count multi-byte characters
         }
         ///////////////////////////////////////////////////////////////////////////////////
         //// When running in command mode, code below works for either regular file or STDOUT/STDERR