Skip to content

Commit

Permalink
Cleanup as prepared to merge unicode support
Browse files Browse the repository at this point in the history
Changed the string constructors to use nio.StandardCharset which ensures
that it will not throw an error.

Fixed a bug introduced in SystemIO.writeToFile where the number returned
was not the number of bytes written but the number of characters
written.

Improved the errors generated about \uXXXX in strings

General cleanup of the loop to write bytes in Assembler.storeStrings
  • Loading branch information
TheThirdOne committed Jun 4, 2019
1 parent 1d5cdc3 commit 10e9c6f
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 53 deletions.
46 changes: 17 additions & 29 deletions rars/assembler/Assembler.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
import rars.util.Binary;
import rars.util.SystemIO;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;

import java.io.UnsupportedEncodingException;

/*
Copyright (c) 2003-2012, Pete Sanderson and Kenneth Vollmar
Expand Down Expand Up @@ -1034,7 +1034,6 @@ private void storeStrings(TokenList tokens, Directives direct, ErrorList errors)
char theChar;
for (int j = 1; j < quote.length() - 1; j++) {
theChar = quote.charAt(j);
String strOfChar = "";
if (theChar == '\\') {
theChar = quote.charAt(++j);
switch (theChar) {
Expand Down Expand Up @@ -1070,16 +1069,14 @@ private void storeStrings(TokenList tokens, Directives direct, ErrorList errors)
try{
codePoint = quote.substring(j+1, j+5); //get the UTF-8 codepoint following the unicode escape sequence
theChar = Character.toChars(Integer.parseInt(codePoint, 16))[0]; //converts the codepoint to single character
} catch(StringIndexOutOfBoundsException | NumberFormatException e){
String invalidCodePoint = "";
int endOfCP = j + 5; //a UTF8 codepoint is 4 bytes long
char ch[] = {quote.charAt(++j)};
while (ch[0] != '"' & j != endOfCP){ //grab all characters after the \ u until end of string or end of codepoint
invalidCodePoint = invalidCodePoint.concat(new String(ch)); //parameter to String constructor is a char[] array
ch[0] = quote.charAt(++j);
}
} catch(StringIndexOutOfBoundsException e){
String invalidCodePoint = quote.substring(j+1);
errors.add(new ErrorMessage(token.getSourceProgram(), token
.getSourceLine(), token.getStartPos(), "illegal unicode escape: \"\\u" + invalidCodePoint + "\""));
.getSourceLine(), token.getStartPos(), "unicode escape \"\\u" +
invalidCodePoint + "\" is incomplete. Only escapes with 4 digits are valid."));
} catch(NumberFormatException e){
errors.add(new ErrorMessage(token.getSourceProgram(), token
.getSourceLine(), token.getStartPos(), "illegal unicode escape: \"\\u" + codePoint + "\""));
}
j = j + 4; //skip past the codepoint for next iteration
break;
Expand All @@ -1090,26 +1087,17 @@ private void storeStrings(TokenList tokens, Directives direct, ErrorList errors)
// codes...
}
}
strOfChar = String.valueOf(theChar); //gets the string representation of the char for use with getBytes
String charset = "UTF8";
try{
byte[] bytesOfChar = strOfChar.getBytes(charset);
int lenOfArray = bytesOfChar.length;
for (int k = 0; k < lenOfArray; k++){
try {
Globals.memory.set(this.dataAddress.get(), bytesOfChar[k],
DataTypes.CHAR_SIZE);
} catch (AddressErrorException e) {
errors.add(new ErrorMessage(token.getSourceProgram(), token
.getSourceLine(), token.getStartPos(), "\""
+ this.dataAddress.get() + "\" is not a valid data segment address"));
}
byte[] bytesOfChar = String.valueOf(theChar).getBytes(StandardCharsets.UTF_8);
try {
for (byte b : bytesOfChar) {
Globals.memory.set(this.dataAddress.get(), b,
DataTypes.CHAR_SIZE);
this.dataAddress.increment(DataTypes.CHAR_SIZE);
}
} catch (UnsupportedEncodingException e) {
//thrown only if the given Charset is not Supported by your JVM
System.out.println("Error: " + charset + " charset is not supported by the JVM");
System.exit(0);
} catch (AddressErrorException e) {
errors.add(new ErrorMessage(token.getSourceProgram(), token
.getSourceLine(), token.getStartPos(), "\""
+ this.dataAddress.get() + "\" is not a valid data segment address"));
}

}
Expand Down
18 changes: 4 additions & 14 deletions rars/riscv/syscalls/NullString.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import rars.ProgramStatement;
import rars.riscv.hardware.AddressErrorException;
import rars.riscv.hardware.RegisterFile;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.io.UnsupportedEncodingException;

/*
Copyright (c) 2003-2017, Pete Sanderson,Benjamin Landers and Kenneth Vollmar
Expand Down Expand Up @@ -58,7 +59,7 @@ public static String get(ProgramStatement statement) throws ExitingException {
*/
public static String get(ProgramStatement statement, String reg) throws ExitingException {
int byteAddress = RegisterFile.getValue(reg);
ArrayList<Byte> utf8BytesList = new ArrayList<Byte>(); // Need an array to hold bytes
ArrayList<Byte> utf8BytesList = new ArrayList<>(); // Need an array to hold bytes
try {
utf8BytesList.add((byte) Globals.memory.getByte(byteAddress));
while (utf8BytesList.get(utf8BytesList.size() - 1) != 0) // until null terminator
Expand All @@ -76,17 +77,6 @@ public static String get(ProgramStatement statement, String reg) throws ExitingE
utf8Bytes[i] = utf8BytesList.get(i);
}

//construct the string using UTF8 encoding
String message = "";
String charset = "UTF8";
try {
message = new String(utf8Bytes, charset);
} catch (UnsupportedEncodingException e) {
//thrown only if the given Charset is not supported by your JVM
System.out.println("Error: " + charset + " charset is not supported by the JVM");
System.exit(0);
}

return message;
return new String(utf8Bytes, StandardCharsets.UTF_8);
}
}
13 changes: 3 additions & 10 deletions rars/util/SystemIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -263,16 +263,9 @@ public static int writeToFile(int fd, byte[] myBuffer, int lengthRequested) {
/////////////// DPS 8-Jan-2013 ////////////////////////////////////////////////////
/// Write to STDOUT or STDERR file descriptor while using IDE - write to Messages pane.
if ((fd == STDOUT || fd == STDERR) && Globals.getGui() != null) {
String charset = "UTF8";
try{
String data = new String(myBuffer, charset); //decode the bytes using UTF-8 charset
Globals.getGui().getMessagesPane().postRunMessage(data);
return data.length();
} catch (UnsupportedEncodingException e){
//thrown only if the given Charset is not supported by your JVM
System.out.println("Error: " + charset + " charset is not supported by the JVM");
System.exit(0);
}
String data = new String(myBuffer, StandardCharsets.UTF_8); //decode the bytes using UTF-8 charset
Globals.getGui().getMessagesPane().postRunMessage(data);
return myBuffer.length; // data.length would not count multi-byte characters
}
///////////////////////////////////////////////////////////////////////////////////
//// When running in command mode, code below works for either regular file or STDOUT/STDERR
Expand Down

0 comments on commit 10e9c6f

Please sign in to comment.