diff --git a/.github/workflows/continuous-integration-pip.yml b/.github/workflows/continuous-integration-pip.yml index 5307966c..6bf1caf7 100644 --- a/.github/workflows/continuous-integration-pip.yml +++ b/.github/workflows/continuous-integration-pip.yml @@ -35,22 +35,22 @@ jobs: run: | echo "" | bash etc/CONFIGURE_UBUNTU20LTS.bash - - name: C++ checks with address-sanitizer - # Temporarily, just on MacOS - if: startsWith(matrix.os, 'macos') + - name: C++ checks with address-sanitizer (Mac and Linux) run: | bash bootstrap.sh ./configure -q --disable-opt --enable-silent-rules --enable-address-sanitizer - make && (cd src; make check || (cat test-suite.log; exit 1)) + make + (cd src; make check || (cat test-suite.log; exit 1)) + make distclean - - name: C++ checks with thread-sanitizer - # Just run on ubuntu - # Temporarily, disable + - name: C++ checks with thread-sanitizer (disabled) if: startsWith(matrix.os, 'ubuntu-DISABLED') run: | bash bootstrap.sh ./configure -q --disable-opt --enable-silent-rules --enable-thread-sanitizer - make && (cd src; make check || (cat test-suite.log; exit 1)) + make + (cd src; make check || (cat test-suite.log; exit 1)) + make distclean - name: C++ checks with codecov if: startsWith(matrix.os, 'ubuntu') @@ -84,4 +84,6 @@ jobs: - name: distcheck run: | - make clean && make distcheck + ./configure -q + make clean + make distcheck diff --git a/configure.ac b/configure.ac index a950dc40..e881f8ce 100644 --- a/configure.ac +++ b/configure.ac @@ -331,7 +331,7 @@ AC_MSG_NOTICE([ LDFLAGS: $LDFLAGS]) # if test x"${libewf}" != "xyes" ; then - AC_MSG_WARN([libewf-devel must be installed for E01 support.]) + AC_MSG_WARN([libewf not install. Please install libewf-devel for E01 support. See the etc/ directory for details.]) fi AC_OUTPUT diff --git a/etc/CONFIGURE_UBUNTU20LTS.bash b/etc/CONFIGURE_UBUNTU20LTS.bash index 8a057be3..cce6ce99 100755 --- a/etc/CONFIGURE_UBUNTU20LTS.bash +++ b/etc/CONFIGURE_UBUNTU20LTS.bash @@ -60,7 +60,7 @@ fi echo Will now try to install -sudo apt update -y || fail could not apt update +sudo apt upgrade -y || fail could not apt upgrade sudo apt install -y $MKPGS || fail could not apt install $MKPGS echo manually installing a modern libewf diff --git a/src/Makefile.am b/src/Makefile.am index 454d4905..dcc6926d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -114,7 +114,6 @@ bulk_extractor_parts = \ bulk_extractor.cpp \ bulk_extractor.h \ cxxopts.hpp \ - findopts.h \ image_process.cpp \ image_process.h \ notify_thread.cpp \ @@ -129,6 +128,9 @@ bulk_extractor_SOURCES = $(bulk_extractor_parts) $(scanners_builtin) main.cpp test_be_SOURCES = $(bulk_extractor_parts) $(scanners_builtin) be13_api/catch.hpp test_be.cpp test_be.h test_be2.cpp +runs.txt: test_be tests/run_each.sh + bash tests/run_each.sh > runs.txt 2>&1 + #lib: libbulkextractor.so #libbulkextractor.so: $(bulk_extractorlib_OBJECTS) diff --git a/src/be13_api b/src/be13_api index 81b46b45..b81d16b6 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 81b46b45715814dffab07783952f5c28298b73a1 +Subproject commit b81d16b65fb7934ebb40bdfbfeef3e9df5bbd1d2 diff --git a/src/bulk_extractor.cpp b/src/bulk_extractor.cpp index f3418cf6..334fffeb 100644 --- a/src/bulk_extractor.cpp +++ b/src/bulk_extractor.cpp @@ -133,14 +133,19 @@ std::string ns_to_sec(uint64_t ns) return std::to_string(sec100/100) + std::string(".") +std::to_string(tens) + std::to_string(hundredths); } -int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * const *argv) +void bulk_extractor_set_debug() { sbuf_t::debug_range_exception = getenv_debug("DEBUG_SBUF_RANGE_EXCEPTION"); sbuf_t::debug_alloc = getenv_debug("DEBUG_SBUF_ALLOC"); sbuf_t::debug_leak = getenv_debug("DEBUG_SBUF_LEAK"); - int64_t sbuf_count = sbuf_t::sbuf_count; - if (sbuf_count!=0) { - std::cerr << "sbuf_count=" << sbuf_count << " at start of execution." << std::endl; +} + +int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * const *argv) +{ + bulk_extractor_set_debug(); + int64_t start_sbuf_count = sbuf_t::sbuf_count; + if (start_sbuf_count!=0) { + cerr << "start_sbuf_count=" << start_sbuf_count << " at start of execution." << std::endl; } mtrace(); @@ -228,8 +233,8 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * "directories for scanner shared libraries (can be repeated). " "Default directories include /usr/local/lib/bulk_extractor, /usr/lib/bulk_extractor " "and any directories specified in the BE_PATH environment variable.", cxxopts::value>()) - ("p,path", "print the value of [:length][/h][/r] with optional length, hex output, or raw output.", cxxopts::value()) - ("q,quit", "no status output") + ("p,path", "print the value of [:length][/h][/r] with optional length, hex output, or raw output.", cxxopts::value()) + ("q,quit", "no status or performance output") ("r,alert_list", "file to read alert list from", cxxopts::value()) ("R,recurse", "treat image file as a directory to recursively explore") ("S,set", "set a name=value option (can be repeated)", cxxopts::value>()) @@ -439,7 +444,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * struct feature_recorder_set::flags_t f; scanner_set ss( sc, f, nullptr); // make a scanner_set but with no XML writer. We will create it below - ss.add_scanners( scanners_builtin); + ss.add_scanners( scanners_builtin ); /* Applying the scanner commands will create the alert recorder. */ try { @@ -475,7 +480,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * /* are we supposed to run the path printer? If so, we can use cout_, since the notify stream won't be running. */ if ( result.count( "path" ) ) { std::string opt_path = result["path"].as(); - path_printer pp( &ss, p, cout); + path_printer pp( ss, p, cout); if ( opt_path=="-http" || opt_path=="--http" ){ pp.process_http( std::cin); } else if ( opt_path=="-i" || opt_path=="-" ){ @@ -483,6 +488,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * } else { pp.process_path( opt_path); } + delete p; return 0; } @@ -490,12 +496,13 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * * We use *p because we don't know which subclass we will be getting. */ + /* Strangely, if we make xreport a stack variable, we fail */ dfxml_writer *xreport = new dfxml_writer( sc.outdir / Phase1::REPORT_FILENAME, false ); // do not make DTD ss.set_dfxml_writer( xreport ); /* Start the clock */ master_timer.start(); - Phase1 phase1( cfg, *p, ss); + Phase1 phase1( cfg, *p, ss, cout); /* Validate the args */ validate_path( sc.input_fname ); @@ -566,6 +573,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * catch ( const feature_recorder::DiskWriteError &e ) { cerr << "Disk write error during Phase 1 ( scanning). Disk is probably full." << std::endl << "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl; + // do not call ss.shutdown() to avoid writing out histograms return 6; } @@ -588,7 +596,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * if ( !cfg.opt_quiet) cout << "Phase 2. Shutting down scanners" << std::endl ; xreport->add_timestamp( "phase2 start" ); try { - std::cout << "Computing final histograms and shutting down..." << std::endl ; + cout << "Computing final histograms and shutting down..." << std::endl ; ss.shutdown(); } catch ( const feature_recorder::DiskWriteError &e ) { @@ -650,9 +658,9 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * << std::endl; if (ss.producer_wait_ns() > ss.consumer_wait_ns_per_worker()){ - std::cout << "*** More time spent waiting for workers. You need faster CPU or more cores for improved performance." << std::endl; + cout << "*** More time spent waiting for workers. You need faster CPU or more cores for improved performance." << std::endl; } else { - std::cout << "*** More time spent waiting for reader. You need faster I/O for improved performance." << std::endl; + cout << "*** More time spent waiting for reader. You need faster I/O for improved performance." << std::endl; } } @@ -664,19 +672,26 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * cout << "Did not scan for email addresses." << std::endl; } - if (sbuf_count != sbuf_t::sbuf_count) { - std::cerr << "Initial sbuf_t.sbuf_total=" << sbuf_count << " end sbuf_count=" << sbuf_t::sbuf_count << std::endl; + if (start_sbuf_count != sbuf_t::sbuf_count) { + cerr << "sbuf_t leak detected. Initial sbuf_t.sbuf_total=" << start_sbuf_count << " end sbuf_count=" << sbuf_t::sbuf_count << std::endl; if (sbuf_t::debug_leak) { for (auto const &it : sbuf_t::sbuf_alloced) { - std::cerr << it << std::endl; - std::cerr << " " << *it << std::endl; + cerr << it << std::endl; + cerr << " " << *it << std::endl; } } else { - std::cerr << "Leaked sbuf. set DEBUG_SBUF_ALLOC=1 or DEBUG_SBUF_LEAK=1 to diagnose" << std::endl; + cerr << "Leaked sbuf. set DEBUG_SBUF_ALLOC=1 or DEBUG_SBUF_LEAK=1 to diagnose" << std::endl; } throw std::runtime_error("leaked sbuf"); } + /* Cleanup */ + + delete xreport; // no longer needed + xreport=nullptr; // and zero it out. + delete p; + p = nullptr; + muntrace(); return( 0 ); } diff --git a/src/bulk_extractor.h b/src/bulk_extractor.h index 5c8faff2..76050682 100644 --- a/src/bulk_extractor.h +++ b/src/bulk_extractor.h @@ -17,6 +17,7 @@ [[noreturn]] void debug_help(); void validate_path(const std::filesystem::path fn); +void bulk_extractor_set_debug(); int bulk_extractor_main(std::ostream &cout, std::ostream &cerr, int argc,char * const *argv); #endif diff --git a/src/pattern_scanner.cpp b/src/pattern_scanner.cpp index 184ddccf..0f188128 100644 --- a/src/pattern_scanner.cpp +++ b/src/pattern_scanner.cpp @@ -295,6 +295,8 @@ void scan_lg(PatternScanner& scanner, class scanner_params &sp) { case scanner_params::PHASE_SHUTDOWN: scanner.shutdown(sp); break; + case scanner_params::PHASE_CLEANUP: + TODO - to something here. default: break; } diff --git a/src/phase1.cpp b/src/phase1.cpp index 3f0b3f99..5007ae26 100644 --- a/src/phase1.cpp +++ b/src/phase1.cpp @@ -28,8 +28,8 @@ using namespace std::chrono_literals; -Phase1::Phase1(Config &config_, image_process &p_, scanner_set &ss_): - config(config_), p(p_), ss(ss_), xreport(*ss_.get_dfxml_writer()) +Phase1::Phase1(Config &config_, image_process &p_, scanner_set &ss_, std::ostream &cout_): + config(config_), p(p_), ss(ss_), cout(cout_), xreport(*ss_.get_dfxml_writer()) { } @@ -226,7 +226,6 @@ void Phase1::read_process_sbufs() } if (config.fraction_done) *config.fraction_done = 1.0; - if (!config.opt_quiet) std::cout << "All data read; waiting for threads to finish..." << std::endl; } void Phase1::dfxml_write_create(int argc, char * const *argv) @@ -284,6 +283,7 @@ void Phase1::phase1_run() } xreport.push("runtime","xmlns:debug=\"http://www.github.com/simsong/bulk_extractor/issues\""); read_process_sbufs(); + if (!config.opt_quiet) cout << "All data read; waiting for threads to finish..." << std::endl; ss.join(); xreport.pop("runtime"); dfxml_write_source(); // written here so it may also include hash diff --git a/src/phase1.h b/src/phase1.h index ca29edd0..9c90f1c8 100644 --- a/src/phase1.h +++ b/src/phase1.h @@ -3,6 +3,7 @@ #include #include +#include #include "be13_api/scanner_set.h" #include "be13_api/dfxml_cpp/src/dfxml_writer.h" @@ -79,6 +80,7 @@ class Phase1 { Config &config; // phase1 config passed in. Writable so seen can be updated. image_process &p; // image being processed scanner_set &ss; // our scanner set + std::ostream &cout; u_int notify_ctr {0}; // for random sampling uint64_t total_bytes {0}; // processed @@ -92,7 +94,7 @@ class Phase1 { sbuf_t *get_sbuf(image_process::iterator &it); - Phase1(Config &config_, image_process &p_, scanner_set &ss_); + Phase1(Config &config_, image_process &p_, scanner_set &ss_, std::ostream &cout_); void dfxml_write_create(int argc, char * const *argv); // create the DFXML header void dfxml_write_source(); // create the DFXML block void read_process_sbufs(); // read and process the sbufs diff --git a/src/scan_exiv2.cpp b/src/scan_exiv2.cpp index 51e88770..366002ae 100644 --- a/src/scan_exiv2.cpp +++ b/src/scan_exiv2.cpp @@ -115,7 +115,6 @@ void scan_exiv2(struct scanner_params &sp) sp.info->feature_defs.push_back( feature_recorder_def("gps")); return; } - if(sp.phase==scanner_params::PHASE_SHUTDOWN) return; if(sp.phase==scanner_params::PHASE_SCAN){ const sbuf_t &sbuf = sp.sbuf; diff --git a/src/scan_find.cpp b/src/scan_find.cpp index 15ee61bf..d2c1537d 100644 --- a/src/scan_find.cpp +++ b/src/scan_find.cpp @@ -60,9 +60,7 @@ void scan_find(scanner_params &sp) sp.info->histogram_defs.push_back( histogram_def("find", "find", "", "","histogram", lowercase)); return; } - if(sp.phase==scanner_params::PHASE_SHUTDOWN) return; - - if (scanner_params::PHASE_INIT2 == sp.phase) { + if (sp.phase == scanner_params::PHASE_INIT2 ) { for (const auto &it : sp.ss->find_patterns()) { add_find_pattern(it); if (sp.ss->writer) { sp.ss->writer->xmlout("find_pattern", it); } diff --git a/src/scan_gzip.cpp b/src/scan_gzip.cpp index 212ff967..b019a84e 100644 --- a/src/scan_gzip.cpp +++ b/src/scan_gzip.cpp @@ -37,9 +37,10 @@ void scan_gzip(scanner_params &sp) if( sbuf_decompress::is_gzip_header( sbuf, i)){ auto *decomp = sbuf_decompress::sbuf_new_decompress( sbuf.slice(i), gzip_max_uncompr_size, "GZIP" ,sbuf_decompress::mode_t::GZIP, 0); - if (decomp==nullptr) continue; - assert(sbuf.depth() +1 == decomp->depth()); - sp.recurse(decomp); // recurse will free the sbuf + if (decomp!=nullptr) { + assert(sbuf.depth()+1 == decomp->depth()); // make sure it is 1 deeper! + sp.recurse(decomp); // recurse will free the sbuf + } } } } diff --git a/src/scan_hiberfile.cpp b/src/scan_hiberfile.cpp index 98b951bb..9ebdbeae 100644 --- a/src/scan_hiberfile.cpp +++ b/src/scan_hiberfile.cpp @@ -110,7 +110,6 @@ void scan_hiberfile(scanner_params &sp) sp.info->min_sbuf_size = MIN_COMPRESSED_SIZE; return; /* no features */ } - if (sp.phase==scanner_params::PHASE_SHUTDOWN) return; if (sp.phase==scanner_params::PHASE_SCAN){ /* Do not scan for hibernation decompression if we are already diff --git a/src/scan_httplogs.cpp b/src/scan_httplogs.cpp index 415c13fc..57a0b04b 100644 --- a/src/scan_httplogs.cpp +++ b/src/scan_httplogs.cpp @@ -68,8 +68,6 @@ void scan_httplogs(scanner_params &sp) return; } - if(sp.phase==scanner_params::PHASE_SHUTDOWN) return; - if(sp.phase==scanner_params::PHASE_SCAN){ feature_recorder &httplogs_recorder = sp.named_feature_recorder("httplogs"); const sbuf_t &sbuf = *(sp.sbuf); diff --git a/src/scan_net.cpp b/src/scan_net.cpp index 484dedd5..21fe983e 100644 --- a/src/scan_net.cpp +++ b/src/scan_net.cpp @@ -895,7 +895,7 @@ void scan_net(scanner_params &sp) */ } } - if (sp.phase==scanner_params::PHASE_SHUTDOWN){ + if (sp.phase==scanner_params::PHASE_CLEANUP){ if (scanner){ delete scanner; scanner = nullptr; diff --git a/src/scan_windirs.cpp b/src/scan_windirs.cpp index d75e2084..6f6d972a 100644 --- a/src/scan_windirs.cpp +++ b/src/scan_windirs.cpp @@ -517,7 +517,6 @@ void scan_windirs(scanner_params &sp) //debug = sp.info->config->debug; return; } - if (sp.phase==scanner_params::PHASE_SHUTDOWN) return; // no shutdown if (sp.phase==scanner_params::PHASE_SCAN){ feature_recorder &wrecorder = sp.named_feature_recorder("windirs"); scan_fatdirs(*sp.sbuf, wrecorder); diff --git a/src/scan_wordlist.cpp b/src/scan_wordlist.cpp index 1db961e7..70934237 100644 --- a/src/scan_wordlist.cpp +++ b/src/scan_wordlist.cpp @@ -249,13 +249,19 @@ void scan_wordlist(scanner_params &sp) } if (sp.phase==scanner_params::PHASE_SCAN){ + assert (wordlist!=nullptr); wordlist->process_sbuf(sp); } if (sp.phase==scanner_params::PHASE_SHUTDOWN){ + assert (wordlist!=nullptr); wordlist->shutdown(sp); - delete wordlist; - wordlist = nullptr; + } + if (sp.phase==scanner_params::PHASE_CLEANUP){ + if (wordlist) { + delete wordlist; + wordlist = nullptr; + } } } diff --git a/src/scan_xor.cpp b/src/scan_xor.cpp index 4c88a5db..13f904e7 100644 --- a/src/scan_xor.cpp +++ b/src/scan_xor.cpp @@ -6,6 +6,7 @@ #include "config.h" #include "be13_api/scanner_params.h" #include "be13_api/utils.h" +#include "be13_api/formatter.h" static int xor_mask = 255; extern "C" @@ -48,9 +49,7 @@ void scan_xor(scanner_params &sp) } } - std::stringstream ss; - ss << "XOR(" << uint32_t(xor_mask) << ")"; - const pos0_t pos0_xor = pos0 + ss.str(); + const pos0_t pos0_xor = pos0 + (Formatter() << "XOR(" << uint32_t(xor_mask) << ")"); // managed_malloc throws an exception if allocation fails. auto *dbuf = sbuf_t::sbuf_malloc(pos0_xor, sbuf.bufsize, sbuf.pagesize); diff --git a/src/test_be.cpp b/src/test_be.cpp index b7f1846d..d5f555e5 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -301,76 +301,76 @@ TEST_CASE("scan_json1", "[scanners]") { /* * First packet of a wget from http://www.google.com/ over ipv4: -# ifconfig en0 -en0: flags=8863 mtu 1500 - options=400 - ether 2c:f0:a2:f3:a8:ee - inet6 fe80::1896:319a:43fa:a6fe%en0 prefixlen 64 secured scopeid 0x4 - inet 172.20.0.185 netmask 0xfffff000 broadcast 172.20.15.255 - nd6 options=201 - media: autoselect - status: active -# tcpdump -r packet1.pcap -vvvv -x -reading from file packet1.pcap, link-type EN10MB (Ethernet) -08:39:26.039111 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 64) - 172.20.0.185.59910 > lax30s03-in-f4.1e100.net.http: Flags [SEW], cksum 0x8efd (correct), seq 2878109014, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1914841783 ecr 0,sackOK,eol], length 0 - 0x0000: 4500 0040 0000 4000 4006 3b8d ac14 00b9 - 0x0010: acd9 a584 ea06 0050 ab8c 7556 0000 0000 - 0x0020: b0c2 ffff 8efd 0000 0204 05b4 0103 0306 - 0x0030: 0101 080a 7222 2ab7 0000 0000 0402 0000 -bash-3.2# xxd packet1.pcap -00000000: d4c3 b2a1 0200 0400 0000 0000 0000 0000 ................ -00000010: 0000 0400 0100 0000 fe7e 0e61 c798 0000 .........~.a.... -00000020: 4e00 0000 4e00 0000 0050 e804 774b 2cf0 N...N....P..wK,. -00000030: a2f3 a8ee 0800 4500 0040 0000 4000 4006 ......E..@..@.@. -00000040: 3b8d ac14 00b9 acd9 a584 ea06 0050 ab8c ;............P.. -00000050: 7556 0000 0000 b0c2 ffff 8efd 0000 0204 uV.............. -00000060: 05b4 0103 0306 0101 080a 7222 2ab7 0000 ..........r"*... -00000070: 0000 0402 0000 ...... + # ifconfig en0 + en0: flags=8863 mtu 1500 + options=400 + ether 2c:f0:a2:f3:a8:ee + inet6 fe80::1896:319a:43fa:a6fe%en0 prefixlen 64 secured scopeid 0x4 + inet 172.20.0.185 netmask 0xfffff000 broadcast 172.20.15.255 + nd6 options=201 + media: autoselect + status: active + # tcpdump -r packet1.pcap -vvvv -x + reading from file packet1.pcap, link-type EN10MB (Ethernet) + 08:39:26.039111 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 64) + 172.20.0.185.59910 > lax30s03-in-f4.1e100.net.http: Flags [SEW], cksum 0x8efd (correct), seq 2878109014, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1914841783 ecr 0,sackOK,eol], length 0 + 0x0000: 4500 0040 0000 4000 4006 3b8d ac14 00b9 + 0x0010: acd9 a584 ea06 0050 ab8c 7556 0000 0000 + 0x0020: b0c2 ffff 8efd 0000 0204 05b4 0103 0306 + 0x0030: 0101 080a 7222 2ab7 0000 0000 0402 0000 + bash-3.2# xxd packet1.pcap + 00000000: d4c3 b2a1 0200 0400 0000 0000 0000 0000 ................ + 00000010: 0000 0400 0100 0000 fe7e 0e61 c798 0000 .........~.a.... + 00000020: 4e00 0000 4e00 0000 0050 e804 774b 2cf0 N...N....P..wK,. + 00000030: a2f3 a8ee 0800 4500 0040 0000 4000 4006 ......E..@..@.@. + 00000040: 3b8d ac14 00b9 acd9 a584 ea06 0050 ab8c ;............P.. + 00000050: 7556 0000 0000 b0c2 ffff 8efd 0000 0204 uV.............. + 00000060: 05b4 0103 0306 0101 080a 7222 2ab7 0000 ..........r"*... + 00000070: 0000 0402 0000 ...... */ /* ethernet frame for packet above. Note that it starts 6 bytes before the source ethernet mac address. * validated with packet decoder at https://hpd.gasmi.net/. 172.20.0.185 → 172.217.165.132 TCP 59910 → 80 [SYN, ECN, CWR] -Ethernet II -Destination: Nomadix_04:77:4b (00:50:e8:04:77:4b) -Source: Apple_f3:a8:ee (2c:f0:a2:f3:a8:ee) -Type: IPv4 (0x0800) -Internet Protocol Version 4 -0100 .... = Version: 4 -.... 0101 = Header Length: 20 bytes (5) -Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT) -Total Length: 64 -Identification: 0x0000 (0) -Flags: 0x40, Don't fragment -Fragment Offset: 0 -Time to Live: 64 -Protocol: TCP (6) -Header Checksum: 0x3b8d (15245) -Header checksum status: Unverified -Source Address: 172.20.0.185 -Destination Address: 172.217.165.132 -Transmission Control Protocol -Source Port: 59910 -Destination Port: 80 -Stream index: 0 -TCP Segment Len: 0 -Sequence Number: 0 -Sequence Number (raw): 2878109014 -Next Sequence Number: 1 -Acknowledgment Number: 0 -Acknowledgment number (raw): 0 -1011 .... = Header Length: 44 bytes (11) -Flags: 0x0c2 (SYN, ECN, CWR) -Window: 65535 -Calculated window size: 65535 -Checksum: 0x8efd -Checksum Status: Unverified -Urgent Pointer: 0 -Options: (24 bytes), Maximum segment size, No-Operation (NOP), Window scale, No-Operation (NOP), No-Operation (NOP), Timestamps, SACK permitted, End of Option List (EOL) -Timestamps + Ethernet II + Destination: Nomadix_04:77:4b (00:50:e8:04:77:4b) + Source: Apple_f3:a8:ee (2c:f0:a2:f3:a8:ee) + Type: IPv4 (0x0800) + Internet Protocol Version 4 + 0100 .... = Version: 4 + .... 0101 = Header Length: 20 bytes (5) + Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT) + Total Length: 64 + Identification: 0x0000 (0) + Flags: 0x40, Don't fragment + Fragment Offset: 0 + Time to Live: 64 + Protocol: TCP (6) + Header Checksum: 0x3b8d (15245) + Header checksum status: Unverified + Source Address: 172.20.0.185 + Destination Address: 172.217.165.132 + Transmission Control Protocol + Source Port: 59910 + Destination Port: 80 + Stream index: 0 + TCP Segment Len: 0 + Sequence Number: 0 + Sequence Number (raw): 2878109014 + Next Sequence Number: 1 + Acknowledgment Number: 0 + Acknowledgment number (raw): 0 + 1011 .... = Header Length: 44 bytes (11) + Flags: 0x0c2 (SYN, ECN, CWR) + Window: 65535 + Calculated window size: 65535 + Checksum: 0x8efd + Checksum Status: Unverified + Urgent Pointer: 0 + Options: (24 bytes), Maximum segment size, No-Operation (NOP), Window scale, No-Operation (NOP), No-Operation (NOP), Timestamps, SACK permitted, End of Option List (EOL) + Timestamps - */ +*/ uint8_t packet1[] = { 0x00, 0x50, 0xe8, 0x04, 0x77, 0x4b, 0x2c, 0xf0, 0xa2, 0xf3, 0xa8, 0xee, 0x08, 0x00, 0x45, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x00, 0x40, 0x06, @@ -549,11 +549,15 @@ bool feature_match(const Check &exp, const std::string &line) /* * Run all of the built-in scanners on a specific image, look for the given features, and return the directory. + * These are run single-threading for ease of debugging. */ std::filesystem::path validate(std::string image_fname, std::vector &expected, bool recurse=true, size_t offset=0) { - debug = getenv("DEBUG") ? true : false; - sbuf_t::debug_range_exception = true; + int start_sbuf_count = sbuf_t::sbuf_count; + + debug = getenv_debug("DEBUG"); + bulk_extractor_set_debug(); // Set from getenv + sbuf_t::debug_range_exception = true; // make sure this is explicitly set scanner_config sc; sc.outdir = NamedTemporaryDirectory(); @@ -585,17 +589,20 @@ std::filesystem::path validate(std::string image_fname, std::vector &expe frs_flags.pedantic = true; // for testing auto *xreport = new dfxml_writer(sc.outdir / "report.xml", false); scanner_set ss(sc, frs_flags, xreport); + //ss.debug_flags.debug_scanners_same_thread = true; // run everything in the same thread ss.add_scanners(scanners_builtin); ss.apply_scanner_commands(); if (image_fname != "" ) { try { auto p = image_process::open( sc.input_fname, false, 65536, 65536); + std::stringstream strs; Phase1::Config cfg; // config for the image_processing system cfg.opt_quiet = true; // do not need status reports - Phase1 phase1(cfg, *p, ss); + Phase1 phase1(cfg, *p, ss, strs); phase1.dfxml_write_create( 0, nullptr); + assert (ss.get_threading() == false); ss.phase_scan(); phase1.phase1_run(); delete p; @@ -607,6 +614,10 @@ std::filesystem::path validate(std::string image_fname, std::vector &expe } ss.shutdown(); + /* There should be nothing in the work queue */ + assert( ss.sbufs_in_queue == 0); + assert( ss.bytes_in_queue == 0); + xreport->pop("dfxml"); xreport->close(); delete xreport; @@ -645,6 +656,7 @@ std::filesystem::path validate(std::string image_fname, std::vector &expe } REQUIRE(found); } + REQUIRE(start_sbuf_count == sbuf_t::sbuf_count); return sc.outdir; } @@ -940,7 +952,7 @@ TEST_CASE("image_process", "[phase1]") { /**************************************************************** ** Test the path printer **/ -TEST_CASE("path_printer", "[path_printer]") { +TEST_CASE("path-printer1", "[path_printer]") { scanner_config sc; sc.input_fname = test_dir() / "test_hello.512b.gz"; sc.enable_all_scanners(); @@ -950,9 +962,9 @@ TEST_CASE("path_printer", "[path_printer]") { ss.add_scanners(scanners_builtin); ss.apply_scanner_commands(); - auto reader = image_process::open( sc.input_fname, false, 65536, 65536 ); + image_process *reader = image_process::open( sc.input_fname, false, 65536, 65536 ); std::stringstream str; - class path_printer pp(&ss, reader, str); + class path_printer pp(ss, reader, str); pp.process_path("512-GZIP-0/h"); // create a hex dump REQUIRE(str.str() == "0000: 6865 6c6c 6f40 776f 726c 642e 636f 6d0a hello@world.com.\n"); @@ -960,4 +972,5 @@ TEST_CASE("path_printer", "[path_printer]") { pp.process_path("512-GZIP-2/r"); // create a hex dump with a different path and the /r REQUIRE( str.str() == "14\r\nllo@world.com\n" ); + delete reader; } diff --git a/src/test_be2.cpp b/src/test_be2.cpp index d275e1ad..ee687166 100644 --- a/src/test_be2.cpp +++ b/src/test_be2.cpp @@ -68,10 +68,15 @@ int argv_count(const char **argv) return argc; } +int run_be(std::ostream &ss, const char **argv) +{ + return bulk_extractor_main(ss, ss, argv_count(argv), const_cast(argv)); +} + TEST_CASE("e2e-no-args", "[end-to-end]") { const char *argv[] = {"bulk_extractor", nullptr}; std::stringstream ss; - int ret = bulk_extractor_main(ss, ss, 1, const_cast(argv)); + int ret = run_be(ss, argv); REQUIRE( ret==3 ); // produces 3 } @@ -79,7 +84,7 @@ TEST_CASE("e2e-h", "[end-to-end]") { /* Try the -h option */ const char *argv[] = {"bulk_extractor", "-h", nullptr}; std::stringstream ss; - int ret = bulk_extractor_main(ss, ss, 2, const_cast(argv)); + int ret = run_be(ss, argv); REQUIRE( ret==1 ); // -h now produces 1 } @@ -87,7 +92,7 @@ TEST_CASE("e2e-H", "[end-to-end]") { /* Try the -H option */ const char *argv[] = {"bulk_extractor", "-H", nullptr}; std::stringstream ss; - int ret = bulk_extractor_main(ss, ss, 2, const_cast(argv)); + int ret = run_be(ss, argv); REQUIRE( ret==2 ); // -H produces 2 } @@ -97,10 +102,10 @@ TEST_CASE("e2e-0", "[end-to-end]") { /* Try to run twice. There seems to be a problem with the second time through. */ std::string inpath_string = inpath.string(); std::string outdir_string = outdir.string(); - const char *argv[] = {"bulk_extractor", "-0", "-o", outdir_string.c_str(), inpath_string.c_str(), nullptr}; + const char *argv[] = {"bulk_extractor", "-0q", "-o", outdir_string.c_str(), inpath_string.c_str(), nullptr}; std::stringstream cout, cerr; - int ret = bulk_extractor_main(cout, cerr, 5, const_cast(argv)); + int ret = bulk_extractor_main(cout, cerr, argv_count(argv), const_cast(argv)); if (ret!=0) { std::cerr << "STDOUT:" << std::endl << cout.str() << std::endl << std::endl << "STDERR:" << std::endl << cerr.str() << std::endl; REQUIRE( ret==0 ); @@ -109,7 +114,7 @@ TEST_CASE("e2e-0", "[end-to-end]") { std::stringstream().swap(cout); std::stringstream().swap(cerr); - ret = bulk_extractor_main(cout, cerr, 5, const_cast(argv)); + ret = bulk_extractor_main(cout, cerr, argv_count(argv), const_cast(argv)); if (ret!=0) { std::cerr << "STDOUT:" << std::endl << cout.str() << std::endl << std::endl << "STDERR:" << std::endl << cerr.str() << std::endl; @@ -127,7 +132,7 @@ TEST_CASE("scan_find", "[end-to-end]") { std::filesystem::path outdir = NamedTemporaryDirectory(); std::string inpath_string = inpath.string(); std::string outdir_string = outdir.string(); - const char *argv[] = {"bulk_extractor", "-0", "-f", "simsong", "-o", outdir_string.c_str(), inpath_string.c_str(), nullptr}; + const char *argv[] = {"bulk_extractor", "-0q", "-f", "simsong", "-o", outdir_string.c_str(), inpath_string.c_str(), nullptr}; std::stringstream cout, cerr; int ret = bulk_extractor_main(cout, cerr, argv_count(argv), const_cast(argv)); @@ -183,11 +188,9 @@ TEST_CASE("5gb-flatfile", "[end-to-end]") { std::filesystem::path outdir = NamedTemporaryDirectory(); std::string outdir_string = outdir.string(); std::string fgb_string = fgb_path.string(); - const char *argv[] = {"bulk_extractor","-Eemail", notify(), "-1", "-o", outdir_string.c_str(), fgb_string.c_str(), nullptr}; std::stringstream ss; - int ret = bulk_extractor_main(ss, std::cerr, - argv_count(argv), - const_cast(argv)); + const char *argv[] = {"bulk_extractor","-Eemail", notify(), "-1q", "-o", outdir_string.c_str(), fgb_string.c_str(), nullptr}; + int ret = bulk_extractor_main(ss, std::cerr, argv_count(argv), const_cast(argv)); REQUIRE( ret==0 ); /* Look for the output line */ auto lines = getLines( outdir / "report.xml" ); @@ -206,7 +209,8 @@ TEST_CASE("30mb-segmented", "[end-to-end]") { snprintf(fname,sizeof(fname),"30mb-segmented.00%d", segment); std::filesystem::path seg_path = std::filesystem::temp_directory_path() / fname; if (segment==0) seg_base = seg_path; - if (!std::filesystem::exists( seg_path )) { + if (!std::filesystem::exists( seg_path ) || + std::filesystem::file_size( seg_path ) < 30000000) { std::ofstream of(seg_path, std::ios::out | std::ios::binary); REQUIRE( of.is_open()); for (unsigned int i=0;i(argv)); + int ret = bulk_extractor_main(ss, std::cerr, argv_count(argv), const_cast(argv)); REQUIRE( ret==0 ); auto lines = getLines( outdir / "report.xml" ); @@ -235,15 +237,16 @@ TEST_CASE("30mb-segmented", "[end-to-end]") { REQUIRE( pos != lines.end()); } -TEST_CASE("path-printer", "[end-to-end]") { +TEST_CASE("path-printer2", "[end-to-end]") { std::filesystem::path inpath = test_dir() / "test_base64json.txt"; std::string inpath_string = inpath.string(); - const char *argv[] = {"bulk_extractor","-p","0:64/h", inpath_string.c_str(), nullptr}; + const char *argv[] = {"bulk_extractor", "-p","0:64/h", inpath_string.c_str(), nullptr}; std::stringstream ss; int ret = bulk_extractor_main(ss, std::cerr, 4, const_cast(argv)); std::string EXPECTED = "0000: 5733 7369 4d53 4936 4943 4a76 626d 5641 596d 467a 5a54 5930 4c6d 4e76 6253 4a39 W3siMSI6ICJvbmVAYmFzZTY0LmNvbSJ9\n" "0020: 4c43 4237 496a 4969 4f69 4169 6448 6476 5147 4a68 6332 5532 4e43 356a 6232 3069 LCB7IjIiOiAidHdvQGJhc2U2NC5jb20i\n"; + std::cerr << "ss: " << std::endl << ss.str() << std::endl; REQUIRE( ret == 0); REQUIRE( ss.str() == EXPECTED); } @@ -254,10 +257,8 @@ TEST_CASE("e2e-CFReDS001", "[end-to-end]") { std::filesystem::path outdir = NamedTemporaryDirectory(); std::string outdir_string = outdir.string(); std::stringstream ss; - const char *argv[] = {"bulk_extractor",notify(), "-1o",outdir_string.c_str(), inpath_string.c_str(), nullptr}; - int ret = bulk_extractor_main(ss, std::cerr, - argv_count(argv), - const_cast(argv)); + const char *argv[] = {"bulk_extractor",notify(), "-1qo",outdir_string.c_str(), inpath_string.c_str(), nullptr}; + int ret = run_be(ss, argv); REQUIRE( ret==0 ); } @@ -267,7 +268,7 @@ TEST_CASE("e2e-email_test", "[end-to-end]") { std::filesystem::path outdir = NamedTemporaryDirectory(); std::string outdir_string = outdir.string(); std::stringstream ss; - const char *argv[] = {"bulk_extractor", notify(), "-1o",outdir_string.c_str(), inpath_string.c_str(), nullptr}; + const char *argv[] = {"bulk_extractor", notify(), "-1qo",outdir_string.c_str(), inpath_string.c_str(), nullptr}; int ret = bulk_extractor_main(ss, std::cerr, argv_count(argv), const_cast(argv)); diff --git a/src/tests/run_each.sh b/src/tests/run_each.sh index 16d8f3c1..630ce251 100755 --- a/src/tests/run_each.sh +++ b/src/tests/run_each.sh @@ -1,9 +1,11 @@ #!/bin/bash # run each test once -tests=$(./test_be -l | egrep -v 'All available|test cases|\[') +PATH=$PATH:.:.. +tests=$(test_be -l | egrep -v 'All available|test cases|\[') for test in $tests ; do echo ========== $test =========== - ./test_be $test + echo '$' test_be $test + test_be $test echo echo done