Skip to content

Commit

Permalink
Parse SAS format strings for XPT outputs (#258)
Browse files Browse the repository at this point in the history
Fixes #257

* Update XPT reading code in line with the output changes
* Check if label_set has been set before running roundtrip test
* Use Ragel parser for xport format name parsing
* Use struct to store parsed xport format
* Add fuzzer for xport_parse_format
* Add new files to VS17 project
  • Loading branch information
gorcha authored Nov 23, 2021
1 parent 53c027c commit 1e0ca4d
Show file tree
Hide file tree
Showing 14 changed files with 605 additions and 56 deletions.
13 changes: 12 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ libreadstat_la_SOURCES = \
src/sas/readstat_xport.c \
src/sas/readstat_xport_read.c \
src/sas/readstat_xport_write.c \
src/sas/readstat_xport_parse_format.c \
src/spss/readstat_por.c \
src/spss/readstat_por_parse.c \
src/spss/readstat_por_read.c \
Expand Down Expand Up @@ -95,6 +96,7 @@ noinst_HEADERS = \
src/sas/readstat_sas.h \
src/sas/readstat_sas_rle.h \
src/sas/readstat_xport.h \
src/sas/readstat_xport_parse_format.h \
src/spss/readstat_por.h \
src/spss/readstat_por_parse.h \
src/spss/readstat_sav.h \
Expand Down Expand Up @@ -274,7 +276,8 @@ EXTRA_PROGRAMS += \
fuzz_grammar_por_double \
fuzz_grammar_sav_date \
fuzz_grammar_sav_time \
fuzz_grammar_spss_format
fuzz_grammar_spss_format \
fuzz_grammar_xport_format

# Force C++ linking for fuzz targets
nodist_EXTRA_fuzz_compression_sas_rle_SOURCES = dummy.cxx
Expand All @@ -293,6 +296,7 @@ nodist_EXTRA_fuzz_grammar_por_double_SOURCES = dummy.cxx
nodist_EXTRA_fuzz_grammar_sav_date_SOURCES = dummy.cxx
nodist_EXTRA_fuzz_grammar_sav_time_SOURCES = dummy.cxx
nodist_EXTRA_fuzz_grammar_spss_format_SOURCES = dummy.cxx
nodist_EXTRA_fuzz_grammar_xport_format_SOURCES = dummy.cxx

fuzz_grammar_dta_timestamp_SOURCES = \
src/fuzz/fuzz_grammar_dta_timestamp.c
Expand Down Expand Up @@ -329,6 +333,13 @@ fuzz_grammar_spss_format_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@
fuzz_grammar_spss_format_LDFLAGS = -static
fuzz_grammar_spss_format_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@

fuzz_grammar_xport_format_SOURCES = \
src/fuzz/fuzz_grammar_xport_format.c

fuzz_grammar_xport_format_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@
fuzz_grammar_xport_format_LDFLAGS = -static
fuzz_grammar_xport_format_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@

fuzz_format_dta_SOURCES = \
src/fuzz/fuzz_format.c \
src/fuzz/fuzz_format_dta.c \
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ required.
`corpus/`. There is a subdirectory for each sub-format (`dta104`, `dta105`,
etc.). Currently a total of 468 files are created.
1. If fuzz-testing has been enabled, `make` will also create fourteen fuzzer
targets, one for each of seven file formats, five for internally used
targets, one for each of seven file formats, six for internally used
grammars, and two fuzzers for testing the compression routines.
* `fuzz_format_dta`
* `fuzz_format_por`
Expand All @@ -461,6 +461,7 @@ required.
* `fuzz_grammar_sav_date`
* `fuzz_grammar_sav_time`
* `fuzz_grammar_spss_format`
* `fuzz_grammar_xport_format`
* `fuzz_compression_sas_rle`
* `fuzz_compression_sav`
Expand Down
4 changes: 3 additions & 1 deletion VS17/ReadStat.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@
<ClCompile Include="..\src\sas\readstat_xport.c" />
<ClCompile Include="..\src\sas\readstat_xport_read.c" />
<ClCompile Include="..\src\sas\readstat_xport_write.c" />
<ClCompile Include="..\src\sas\readstat_xport_parse_format.c" />
<ClCompile Include="..\src\spss\readstat_por.c" />
<ClCompile Include="..\src\spss\readstat_por_parse.c" />
<ClCompile Include="..\src\spss\readstat_por_read.c" />
Expand Down Expand Up @@ -243,6 +244,7 @@
<ClInclude Include="..\src\sas\readstat_sas.h" />
<ClInclude Include="..\src\sas\readstat_sas_rle.h" />
<ClInclude Include="..\src\sas\readstat_xport.h" />
<ClInclude Include="..\src\sas\readstat_xport_parse_format.h" />
<ClInclude Include="..\src\spss\readstat_por.h" />
<ClInclude Include="..\src\spss\readstat_por_parse.h" />
<ClInclude Include="..\src\spss\readstat_sav.h" />
Expand All @@ -263,4 +265,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>
6 changes: 6 additions & 0 deletions VS17/ReadStat.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@
<ClCompile Include="..\src\sas\readstat_xport_write.c">
<Filter>Source Files\sas</Filter>
</ClCompile>
<ClCompile Include="..\src\sas\readstat_xport_parse_format.c">
<Filter>Source Files\sas</Filter>
</ClCompile>
<ClCompile Include="..\src\sas\ieee.c">
<Filter>Source Files\sas</Filter>
</ClCompile>
Expand Down Expand Up @@ -194,6 +197,9 @@
<ClInclude Include="..\src\sas\readstat_xport.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\sas\readstat_xport_parse_format.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\spss\readstat_por.h">
<Filter>Header Files</Filter>
</ClInclude>
Expand Down
154 changes: 154 additions & 0 deletions fuzz/dict/fuzz_grammar_xport_format.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"$ASCII"
"$BASE64X"
"$BINARY"
"$CHAR"
"$EBCDIC"
"$HEX"
"$MSGCASE"
"$OCTAL"
"$QUOTE"
"$REVERJ"
"$REVERS"
"$UPCASE"
"$VARYING"
"$"
"$N8601B"
"$N8601BA"
"$N8601E"
"$N8601EA"
"$N8601EH"
"$N8601EX"
"$N8601H"
"$N8601X"
"B8601DA"
"B8601DN"
"B8601DT"
"B8601DZ"
"B8601LZ"
"B8601TM"
"B8601TZ"
"DATE"
"DATEAMPM"
"DATETIME"
"DAY"
"DDMMYY"
"DDMMYY"
"DOWNAME"
"DTDATE"
"DTMONYY"
"DTWKDATX"
"DTYEAR"
"DTYYQC"
"E8601DA"
"E8601DN"
"E8601DT"
"E8601DZ"
"E8601LZ"
"E8601TM"
"E8601TZ"
"HHMM"
"HOUR"
"JULDAY"
"JULIAN"
"MMDDYY"
"MMDDYY"
"MMSS"
"MMYY"
"MMYY"
"MONNAME"
"MONTH"
"MONYY"
"PDJULG"
"PDJULI"
"QTR"
"QTRR"
"TIME"
"TIMEAMPM"
"TOD"
"WEEKDATE"
"WEEKDATX"
"WEEKDAY"
"WEEKU"
"WEEKV"
"WEEKW"
"WORDDATE"
"WORDDATX"
"YEAR"
"YYMM"
"YYMM"
"YYMMDD"
"YYMMDD"
"YYMON"
"YYQ"
"YYQ"
"YYQR"
"YYQR"
"$N8601B"
"$N8601BA"
"$N8601E"
"$N8601EA"
"$N8601EH"
"$N8601EX"
"$N8601H"
"$N8601X"
"B8601DA"
"B8601DN"
"B8601DT"
"B8601DZ"
"B8601LZ"
"B8601TM"
"B8601TZ"
"E8601DA"
"E8601DN"
"E8601DT"
"E8601DZ"
"E8601LZ"
"E8601TM"
"E8601TZ"
"BEST"
"BESTD"
"BINARY"
"COMMA"
"COMMAX"
"D"
"DOLLAR"
"DOLLARX"
"E"
"FLOAT"
"FRACT"
"HEX"
"IB"
"IBR"
"IEEE"
"NEGPAREN"
"NUMX"
"OCTAL"
"PD"
"PERCENT"
"PERCENTN"
"PIB"
"PIBR"
"PK"
"PVALUE"
"RB"
"ROMAN"
"S370FF"
"S370FIB"
"S370FIBU"
"S370FPD"
"S370FPDU"
"S370FPIB"
"S370FRB"
"S370FZD"
"S370FZDL"
"S370FZDS"
"S370FZDT"
"S370FZDU"
"SSN"
"VAXRB"
"VMSZN"
""
"WORDF"
"WORDS"
"Z"
"ZD"
12 changes: 12 additions & 0 deletions src/fuzz/fuzz_grammar_xport_format.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include <stdlib.h>
#include <time.h>

#include "../readstat.h"
#include "../sas/readstat_xport.h"
#include "../sas/readstat_xport_parse_format.h"

int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
xport_format_t format;
xport_parse_format((const char *)Data, Size, &format, NULL, NULL);
return 0;
}
6 changes: 6 additions & 0 deletions src/sas/readstat_xport.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ typedef struct xport_namestr_s {
} xport_namestr_t;
#pragma pack(pop)

typedef struct xport_format_s {
char name[32];
int width;
int decimals;
} xport_format_t;

#define XPORT_MIN_DOUBLE_SIZE 3
#define XPORT_MAX_DOUBLE_SIZE 8

Expand Down
Loading

0 comments on commit 1e0ca4d

Please sign in to comment.