Skip to content

Commit

Permalink
Merge pull request #309 from simsong/slg-dev
Browse files Browse the repository at this point in the history
Slg dev
  • Loading branch information
simsong authored Dec 14, 2021
2 parents 7373302 + 72bb3e8 commit 69345a5
Show file tree
Hide file tree
Showing 22 changed files with 185 additions and 146 deletions.
20 changes: 11 additions & 9 deletions .github/workflows/continuous-integration-pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,22 @@ jobs:
run: |
echo "" | bash etc/CONFIGURE_UBUNTU20LTS.bash
- name: C++ checks with address-sanitizer
# Temporarily, just on MacOS
if: startsWith(matrix.os, 'macos')
- name: C++ checks with address-sanitizer (Mac and Linux)
run: |
bash bootstrap.sh
./configure -q --disable-opt --enable-silent-rules --enable-address-sanitizer
make && (cd src; make check || (cat test-suite.log; exit 1))
make
(cd src; make check || (cat test-suite.log; exit 1))
make distclean
- name: C++ checks with thread-sanitizer
# Just run on ubuntu
# Temporarily, disable
- name: C++ checks with thread-sanitizer (disabled)
if: startsWith(matrix.os, 'ubuntu-DISABLED')
run: |
bash bootstrap.sh
./configure -q --disable-opt --enable-silent-rules --enable-thread-sanitizer
make && (cd src; make check || (cat test-suite.log; exit 1))
make
(cd src; make check || (cat test-suite.log; exit 1))
make distclean
- name: C++ checks with codecov
if: startsWith(matrix.os, 'ubuntu')
Expand Down Expand Up @@ -84,4 +84,6 @@ jobs:

- name: distcheck
run: |
make clean && make distcheck
./configure -q
make clean
make distcheck
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ AC_MSG_NOTICE([ LDFLAGS: $LDFLAGS])
#

if test x"${libewf}" != "xyes" ; then
AC_MSG_WARN([libewf-devel must be installed for E01 support.])
AC_MSG_WARN([libewf not install. Please install libewf-devel for E01 support. See the etc/ directory for details.])
fi

AC_OUTPUT
Expand Down
2 changes: 1 addition & 1 deletion etc/CONFIGURE_UBUNTU20LTS.bash
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ fi

echo Will now try to install

sudo apt update -y || fail could not apt update
sudo apt upgrade -y || fail could not apt upgrade
sudo apt install -y $MKPGS || fail could not apt install $MKPGS

echo manually installing a modern libewf
Expand Down
4 changes: 3 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ bulk_extractor_parts = \
bulk_extractor.cpp \
bulk_extractor.h \
cxxopts.hpp \
findopts.h \
image_process.cpp \
image_process.h \
notify_thread.cpp \
Expand All @@ -129,6 +128,9 @@ bulk_extractor_SOURCES = $(bulk_extractor_parts) $(scanners_builtin) main.cpp
test_be_SOURCES = $(bulk_extractor_parts) $(scanners_builtin) be13_api/catch.hpp test_be.cpp test_be.h test_be2.cpp


runs.txt: test_be tests/run_each.sh
bash tests/run_each.sh > runs.txt 2>&1

#lib: libbulkextractor.so

#libbulkextractor.so: $(bulk_extractorlib_OBJECTS)
Expand Down
2 changes: 1 addition & 1 deletion src/be13_api
49 changes: 32 additions & 17 deletions src/bulk_extractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,19 @@ std::string ns_to_sec(uint64_t ns)
return std::to_string(sec100/100) + std::string(".") +std::to_string(tens) + std::to_string(hundredths);
}

int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * const *argv)
void bulk_extractor_set_debug()
{
sbuf_t::debug_range_exception = getenv_debug("DEBUG_SBUF_RANGE_EXCEPTION");
sbuf_t::debug_alloc = getenv_debug("DEBUG_SBUF_ALLOC");
sbuf_t::debug_leak = getenv_debug("DEBUG_SBUF_LEAK");
int64_t sbuf_count = sbuf_t::sbuf_count;
if (sbuf_count!=0) {
std::cerr << "sbuf_count=" << sbuf_count << " at start of execution." << std::endl;
}

int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char * const *argv)
{
bulk_extractor_set_debug();
int64_t start_sbuf_count = sbuf_t::sbuf_count;
if (start_sbuf_count!=0) {
cerr << "start_sbuf_count=" << start_sbuf_count << " at start of execution." << std::endl;
}

mtrace();
Expand Down Expand Up @@ -228,8 +233,8 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char *
"directories for scanner shared libraries (can be repeated). "
"Default directories include /usr/local/lib/bulk_extractor, /usr/lib/bulk_extractor "
"and any directories specified in the BE_PATH environment variable.", cxxopts::value<std::vector<std::string>>())
("p,path", "print the value of <path>[:length][/h][/r] with optional length, hex output, or raw output.", cxxopts::value<std::string>())
("q,quit", "no status output")
("p,path", "print the value of <path>[:length][/h][/r] with optional length, hex output, or raw output.", cxxopts::value<std::string>())
("q,quit", "no status or performance output")
("r,alert_list", "file to read alert list from", cxxopts::value<std::string>())
("R,recurse", "treat image file as a directory to recursively explore")
("S,set", "set a name=value option (can be repeated)", cxxopts::value<std::vector<std::string>>())
Expand Down Expand Up @@ -439,7 +444,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char *

struct feature_recorder_set::flags_t f;
scanner_set ss( sc, f, nullptr); // make a scanner_set but with no XML writer. We will create it below
ss.add_scanners( scanners_builtin);
ss.add_scanners( scanners_builtin );

/* Applying the scanner commands will create the alert recorder. */
try {
Expand Down Expand Up @@ -475,27 +480,29 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char *
/* are we supposed to run the path printer? If so, we can use cout_, since the notify stream won't be running. */
if ( result.count( "path" ) ) {
std::string opt_path = result["path"].as<std::string>();
path_printer pp( &ss, p, cout);
path_printer pp( ss, p, cout);
if ( opt_path=="-http" || opt_path=="--http" ){
pp.process_http( std::cin);
} else if ( opt_path=="-i" || opt_path=="-" ){
pp.process_interactive( std::cin);
} else {
pp.process_path( opt_path);
}
delete p;
return 0;
}

/* Open the image file ( or the device) now.
* We use *p because we don't know which subclass we will be getting.
*/

/* Strangely, if we make xreport a stack variable, we fail */
dfxml_writer *xreport = new dfxml_writer( sc.outdir / Phase1::REPORT_FILENAME, false ); // do not make DTD
ss.set_dfxml_writer( xreport );
/* Start the clock */
master_timer.start();

Phase1 phase1( cfg, *p, ss);
Phase1 phase1( cfg, *p, ss, cout);

/* Validate the args */
validate_path( sc.input_fname );
Expand Down Expand Up @@ -566,6 +573,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char *
catch ( const feature_recorder::DiskWriteError &e ) {
cerr << "Disk write error during Phase 1 ( scanning). Disk is probably full." << std::endl
<< "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl;
// do not call ss.shutdown() to avoid writing out histograms
return 6;
}

Expand All @@ -588,7 +596,7 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char *
if ( !cfg.opt_quiet) cout << "Phase 2. Shutting down scanners" << std::endl ;
xreport->add_timestamp( "phase2 start" );
try {
std::cout << "Computing final histograms and shutting down..." << std::endl ;
cout << "Computing final histograms and shutting down..." << std::endl ;
ss.shutdown();
}
catch ( const feature_recorder::DiskWriteError &e ) {
Expand Down Expand Up @@ -650,9 +658,9 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char *
<< std::endl;

if (ss.producer_wait_ns() > ss.consumer_wait_ns_per_worker()){
std::cout << "*** More time spent waiting for workers. You need faster CPU or more cores for improved performance." << std::endl;
cout << "*** More time spent waiting for workers. You need faster CPU or more cores for improved performance." << std::endl;
} else {
std::cout << "*** More time spent waiting for reader. You need faster I/O for improved performance." << std::endl;
cout << "*** More time spent waiting for reader. You need faster I/O for improved performance." << std::endl;
}
}

Expand All @@ -664,19 +672,26 @@ int bulk_extractor_main( std::ostream &cout, std::ostream &cerr, int argc,char *
cout << "Did not scan for email addresses." << std::endl;
}

if (sbuf_count != sbuf_t::sbuf_count) {
std::cerr << "Initial sbuf_t.sbuf_total=" << sbuf_count << " end sbuf_count=" << sbuf_t::sbuf_count << std::endl;
if (start_sbuf_count != sbuf_t::sbuf_count) {
cerr << "sbuf_t leak detected. Initial sbuf_t.sbuf_total=" << start_sbuf_count << " end sbuf_count=" << sbuf_t::sbuf_count << std::endl;
if (sbuf_t::debug_leak) {
for (auto const &it : sbuf_t::sbuf_alloced) {
std::cerr << it << std::endl;
std::cerr << " " << *it << std::endl;
cerr << it << std::endl;
cerr << " " << *it << std::endl;
}
} else {
std::cerr << "Leaked sbuf. set DEBUG_SBUF_ALLOC=1 or DEBUG_SBUF_LEAK=1 to diagnose" << std::endl;
cerr << "Leaked sbuf. set DEBUG_SBUF_ALLOC=1 or DEBUG_SBUF_LEAK=1 to diagnose" << std::endl;
}
throw std::runtime_error("leaked sbuf");
}

/* Cleanup */

delete xreport; // no longer needed
xreport=nullptr; // and zero it out.
delete p;
p = nullptr;

muntrace();
return( 0 );
}
1 change: 1 addition & 0 deletions src/bulk_extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

[[noreturn]] void debug_help();
void validate_path(const std::filesystem::path fn);
void bulk_extractor_set_debug();
int bulk_extractor_main(std::ostream &cout, std::ostream &cerr, int argc,char * const *argv);

#endif
2 changes: 2 additions & 0 deletions src/pattern_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ void scan_lg(PatternScanner& scanner, class scanner_params &sp) {
case scanner_params::PHASE_SHUTDOWN:
scanner.shutdown(sp);
break;
case scanner_params::PHASE_CLEANUP:
TODO - to something here.
default:
break;
}
Expand Down
6 changes: 3 additions & 3 deletions src/phase1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

using namespace std::chrono_literals;

Phase1::Phase1(Config &config_, image_process &p_, scanner_set &ss_):
config(config_), p(p_), ss(ss_), xreport(*ss_.get_dfxml_writer())
Phase1::Phase1(Config &config_, image_process &p_, scanner_set &ss_, std::ostream &cout_):
config(config_), p(p_), ss(ss_), cout(cout_), xreport(*ss_.get_dfxml_writer())
{
}

Expand Down Expand Up @@ -226,7 +226,6 @@ void Phase1::read_process_sbufs()
}

if (config.fraction_done) *config.fraction_done = 1.0;
if (!config.opt_quiet) std::cout << "All data read; waiting for threads to finish..." << std::endl;
}

void Phase1::dfxml_write_create(int argc, char * const *argv)
Expand Down Expand Up @@ -284,6 +283,7 @@ void Phase1::phase1_run()
}
xreport.push("runtime","xmlns:debug=\"http://www.github.com/simsong/bulk_extractor/issues\"");
read_process_sbufs();
if (!config.opt_quiet) cout << "All data read; waiting for threads to finish..." << std::endl;
ss.join();
xreport.pop("runtime");
dfxml_write_source(); // written here so it may also include hash
Expand Down
4 changes: 3 additions & 1 deletion src/phase1.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <thread>
#include <atomic>
#include <ostream>

#include "be13_api/scanner_set.h"
#include "be13_api/dfxml_cpp/src/dfxml_writer.h"
Expand Down Expand Up @@ -79,6 +80,7 @@ class Phase1 {
Config &config; // phase1 config passed in. Writable so seen can be updated.
image_process &p; // image being processed
scanner_set &ss; // our scanner set
std::ostream &cout;

u_int notify_ctr {0}; // for random sampling
uint64_t total_bytes {0}; // processed
Expand All @@ -92,7 +94,7 @@ class Phase1 {
sbuf_t *get_sbuf(image_process::iterator &it);


Phase1(Config &config_, image_process &p_, scanner_set &ss_);
Phase1(Config &config_, image_process &p_, scanner_set &ss_, std::ostream &cout_);
void dfxml_write_create(int argc, char * const *argv); // create the DFXML header
void dfxml_write_source(); // create the DFXML <source> block
void read_process_sbufs(); // read and process the sbufs
Expand Down
1 change: 0 additions & 1 deletion src/scan_exiv2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ void scan_exiv2(struct scanner_params &sp)
sp.info->feature_defs.push_back( feature_recorder_def("gps"));
return;
}
if(sp.phase==scanner_params::PHASE_SHUTDOWN) return;
if(sp.phase==scanner_params::PHASE_SCAN){

const sbuf_t &sbuf = sp.sbuf;
Expand Down
4 changes: 1 addition & 3 deletions src/scan_find.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,7 @@ void scan_find(scanner_params &sp)
sp.info->histogram_defs.push_back( histogram_def("find", "find", "", "","histogram", lowercase));
return;
}
if(sp.phase==scanner_params::PHASE_SHUTDOWN) return;

if (scanner_params::PHASE_INIT2 == sp.phase) {
if (sp.phase == scanner_params::PHASE_INIT2 ) {
for (const auto &it : sp.ss->find_patterns()) {
add_find_pattern(it);
if (sp.ss->writer) { sp.ss->writer->xmlout("find_pattern", it); }
Expand Down
7 changes: 4 additions & 3 deletions src/scan_gzip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ void scan_gzip(scanner_params &sp)
if( sbuf_decompress::is_gzip_header( sbuf, i)){
auto *decomp = sbuf_decompress::sbuf_new_decompress( sbuf.slice(i),
gzip_max_uncompr_size, "GZIP" ,sbuf_decompress::mode_t::GZIP, 0);
if (decomp==nullptr) continue;
assert(sbuf.depth() +1 == decomp->depth());
sp.recurse(decomp); // recurse will free the sbuf
if (decomp!=nullptr) {
assert(sbuf.depth()+1 == decomp->depth()); // make sure it is 1 deeper!
sp.recurse(decomp); // recurse will free the sbuf
}
}
}
}
Expand Down
1 change: 0 additions & 1 deletion src/scan_hiberfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ void scan_hiberfile(scanner_params &sp)
sp.info->min_sbuf_size = MIN_COMPRESSED_SIZE;
return; /* no features */
}
if (sp.phase==scanner_params::PHASE_SHUTDOWN) return;
if (sp.phase==scanner_params::PHASE_SCAN){

/* Do not scan for hibernation decompression if we are already
Expand Down
2 changes: 0 additions & 2 deletions src/scan_httplogs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ void scan_httplogs(scanner_params &sp)
return;
}

if(sp.phase==scanner_params::PHASE_SHUTDOWN) return;

if(sp.phase==scanner_params::PHASE_SCAN){
feature_recorder &httplogs_recorder = sp.named_feature_recorder("httplogs");
const sbuf_t &sbuf = *(sp.sbuf);
Expand Down
2 changes: 1 addition & 1 deletion src/scan_net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,7 @@ void scan_net(scanner_params &sp)
*/
}
}
if (sp.phase==scanner_params::PHASE_SHUTDOWN){
if (sp.phase==scanner_params::PHASE_CLEANUP){
if (scanner){
delete scanner;
scanner = nullptr;
Expand Down
1 change: 0 additions & 1 deletion src/scan_windirs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,6 @@ void scan_windirs(scanner_params &sp)
//debug = sp.info->config->debug;
return;
}
if (sp.phase==scanner_params::PHASE_SHUTDOWN) return; // no shutdown
if (sp.phase==scanner_params::PHASE_SCAN){
feature_recorder &wrecorder = sp.named_feature_recorder("windirs");
scan_fatdirs(*sp.sbuf, wrecorder);
Expand Down
10 changes: 8 additions & 2 deletions src/scan_wordlist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,19 @@ void scan_wordlist(scanner_params &sp)
}

if (sp.phase==scanner_params::PHASE_SCAN){
assert (wordlist!=nullptr);
wordlist->process_sbuf(sp);
}

if (sp.phase==scanner_params::PHASE_SHUTDOWN){
assert (wordlist!=nullptr);
wordlist->shutdown(sp);
delete wordlist;
wordlist = nullptr;
}
if (sp.phase==scanner_params::PHASE_CLEANUP){
if (wordlist) {
delete wordlist;
wordlist = nullptr;
}
}
}

Expand Down
5 changes: 2 additions & 3 deletions src/scan_xor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "config.h"
#include "be13_api/scanner_params.h"
#include "be13_api/utils.h"
#include "be13_api/formatter.h"

static int xor_mask = 255;
extern "C"
Expand Down Expand Up @@ -48,9 +49,7 @@ void scan_xor(scanner_params &sp)
}
}

std::stringstream ss;
ss << "XOR(" << uint32_t(xor_mask) << ")";
const pos0_t pos0_xor = pos0 + ss.str();
const pos0_t pos0_xor = pos0 + (Formatter() << "XOR(" << uint32_t(xor_mask) << ")");

// managed_malloc throws an exception if allocation fails.
auto *dbuf = sbuf_t::sbuf_malloc(pos0_xor, sbuf.bufsize, sbuf.pagesize);
Expand Down
Loading

0 comments on commit 69345a5

Please sign in to comment.