From 95f26e9759fc5ebb56d2aa24ed70529f34671cdd Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sat, 13 Aug 2016 13:59:12 -0400 Subject: [PATCH 01/10] make opening casm-learn input easier --- python/casm/casm/learn/__init__.py | 3 ++- python/casm/casm/learn/fit.py | 25 +++++++++++++++++++++++++ python/casm/scripts/casm-learn | 8 +------- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/python/casm/casm/learn/__init__.py b/python/casm/casm/learn/__init__.py index 20eea5b79..effa1fa92 100644 --- a/python/casm/casm/learn/__init__.py +++ b/python/casm/casm/learn/__init__.py @@ -70,7 +70,7 @@ def create_halloffame(maxsize, rel_tol=1e-6): from fit import example_input_Lasso, example_input_LassoCV, example_input_RFE, \ example_input_GeneticAlgorithm, example_input_IndividualBestFirst, \ example_input_PopulationBestFirst, example_input_DirectSelection, \ - set_input_defaults, \ + open_input, set_input_defaults, \ FittingData, TrainingData, \ print_input_help, print_individual, print_population, print_halloffame, print_eci, \ to_json, open_halloffame, save_halloffame, \ @@ -90,6 +90,7 @@ def create_halloffame(maxsize, rel_tol=1e-6): 'example_input_IndividualBestFirst', 'example_input_PopulationBestFirst', 'example_input_DirectSelection', + 'open_input', 'set_input_defaults', 'FittingData', 'TrainingData', diff --git a/python/casm/casm/learn/fit.py b/python/casm/casm/learn/fit.py index 6aac8cec9..948a24f92 100644 --- a/python/casm/casm/learn/fit.py +++ b/python/casm/casm/learn/fit.py @@ -1101,6 +1101,31 @@ def set_input_defaults(input, input_filename=None): return input +def open_input(input_filename): + """ + Read casm-learn input file into a dict + + Arguments + --------- + + input_filename: str + The path to the input file + + Returns + ------- + input: dict + The result of reading the input file and running it through + casm.learn.set_input_defaults + """ + # open input and always set input defaults before doing anything else + with open(input_filename, 'r') as f: + try: + input = set_input_defaults(json.load(f), input_filename) + except Exception as e: + print "Error parsing JSON in", args.settings[0] + raise e + return input + class FittingData(object): """ FittingData holds feature values, target values, sample weights, etc. used diff --git a/python/casm/scripts/casm-learn b/python/casm/scripts/casm-learn index b9af11de7..60db64160 100755 --- a/python/casm/scripts/casm-learn +++ b/python/casm/scripts/casm-learn @@ -61,13 +61,7 @@ if __name__ == "__main__": if args.verbose: print "Loading", args.settings[0] - # open input and always set input defaults before doing anything else - with open(args.settings[0], 'r') as f: - try: - input = casm.learn.set_input_defaults(json.load(f), args.settings[0]) - except Exception as e: - print "Error parsing JSON in", args.settings[0] - raise e + input = casm.learn.open_input(args.settings[0]) if args.hall: From 40d3f9e9014624e0a8c6da86454316ad9bd08f42 Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sat, 13 Aug 2016 21:00:20 -0400 Subject: [PATCH 02/10] fix bug preventing lte1 from printing configname --- src/casm/monte_carlo/grand_canonical/GrandCanonical.cc | 4 ++-- src/casm/monte_carlo/grand_canonical/GrandCanonicalIO.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/casm/monte_carlo/grand_canonical/GrandCanonical.cc b/src/casm/monte_carlo/grand_canonical/GrandCanonical.cc index 6963be73b..263bae07f 100644 --- a/src/casm/monte_carlo/grand_canonical/GrandCanonical.cc +++ b/src/casm/monte_carlo/grand_canonical/GrandCanonical.cc @@ -40,7 +40,7 @@ namespace CASM { _log() << "use_deltas: " << std::boolalpha << m_use_deltas << "\n"; _log() << "\nSampling: \n"; _log() << std::setw(24) << "quantity" << std::setw(24) << "requested_precision" << "\n"; - for(auto it=samplers().begin(); it!=samplers().end(); ++it) { + for(auto it = samplers().begin(); it != samplers().end(); ++it) { _log() << std::setw(24) << it->first; if(it->second->must_converge()) { _log() << std::setw(24) << it->second->requested_precision() << std::endl; @@ -109,7 +109,7 @@ namespace CASM { if(settings.is_motif_configname()) { - std::string configname = settings.motif_configname(); + configname = settings.motif_configname(); if(configname == "default") { configdof = _default_motif(); diff --git a/src/casm/monte_carlo/grand_canonical/GrandCanonicalIO.cc b/src/casm/monte_carlo/grand_canonical/GrandCanonicalIO.cc index 5dc7d91e9..b69144f05 100644 --- a/src/casm/monte_carlo/grand_canonical/GrandCanonicalIO.cc +++ b/src/casm/monte_carlo/grand_canonical/GrandCanonicalIO.cc @@ -286,6 +286,7 @@ namespace CASM { DataFormatter formatter; + formatter.push_back(ConstantValueFormatter("configname", configname)); formatter.push_back(MonteCarloTFormatter()); formatter.push_back(GrandCanonicalLTEFormatter(phi_LTE1)); std::set exclude; @@ -294,7 +295,6 @@ namespace CASM { // always sample Beta, potential_energy, and formation_energy { formatter.push_back(MonteCarloBetaFormatter()); - formatter.push_back(ConstantValueFormatter("configname", configname)); name = "gs_potential_energy"; auto evaluator = [ = ](const ConstMonteCarloPtr & ptr) { return static_cast(ptr)->potential_energy(); From ce9588f4a6a5ace43c036c2ac080d31951d6a803 Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 01:58:04 -0400 Subject: [PATCH 03/10] add --desc options to seperate extended description from -h output; update 'casm status' for init --- src/casm/app/bset.cc | 6 +++ src/casm/app/composition.cc | 9 ++++- src/casm/app/enum.cc | 6 +++ src/casm/app/files.cc | 6 +++ src/casm/app/format.cc | 40 ++++++++++++++++--- src/casm/app/import.cc | 7 ++++ src/casm/app/init.cc | 7 ++++ src/casm/app/monte.cc | 10 +++++ src/casm/app/perturb.cc | 7 ++++ src/casm/app/ref.cc | 9 ++++- src/casm/app/run.cc | 6 +++ src/casm/app/settings.cc | 51 +++++++++++++++++++++++- src/casm/app/status.cc | 71 +++++++++++++++++++++++++++++----- src/casm/app/super.cc | 8 +++- src/casm/app/sym.cc | 6 +++ src/casm/app/update.cc | 7 ++++ src/casm/app/view.cc | 6 +++ src/casm/completer/Handlers.cc | 3 +- 18 files changed, 245 insertions(+), 20 deletions(-) diff --git a/src/casm/app/bset.cc b/src/casm/app/bset.cc index 7f4eec942..24065ec8c 100644 --- a/src/casm/app/bset.cc +++ b/src/casm/app/bset.cc @@ -50,6 +50,12 @@ namespace CASM { args.log << "\n"; args.log << bset_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + args.log << "\n"; + args.log << bset_opt.desc() << std::endl; args.log << "DESCRIPTION" << std::endl; args.log << " Generate and inspect cluster basis functions. A bspecs.json file should be available at\n" << " $ROOT/basis_set/$current_bset/bspecs.json\n" diff --git a/src/casm/app/composition.cc b/src/casm/app/composition.cc index e5aae3672..54c9021a4 100644 --- a/src/casm/app/composition.cc +++ b/src/casm/app/composition.cc @@ -78,7 +78,7 @@ namespace CASM { bool call_help = false; //quit out if there are no arguments - if(!vm.count("help")) { + if(!vm.count("help") && !vm.count("desc")) { if(vm.count("calc") + vm.count("select") + vm.count("display") + vm.count("update") != 1) { std::cout << "Error in 'casm composition'. You need to use either --calc, --select, --display, or --update." << std::endl; call_help = true; @@ -91,6 +91,13 @@ namespace CASM { std::cout << std::endl; std::cout << comp_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << std::endl; + std::cout << comp_opt.desc() << std::endl; + std::cout << "DESCRIPTION" << std::endl; std::cout << " Setup the composition axes.\n"; std::cout << " - expects a PRIM file in the project root directory \n"; diff --git a/src/casm/app/enum.cc b/src/casm/app/enum.cc index aeff2f736..71d57ac45 100644 --- a/src/casm/app/enum.cc +++ b/src/casm/app/enum.cc @@ -85,6 +85,12 @@ namespace CASM { std::cout << "\n"; std::cout << enum_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << enum_opt.desc() << std::endl; std::cout << "DESCRIPTION" << std::endl; std::cout << " Enumerate supercells and configurations\n"; diff --git a/src/casm/app/files.cc b/src/casm/app/files.cc index 813c9c953..0d337cf96 100644 --- a/src/casm/app/files.cc +++ b/src/casm/app/files.cc @@ -85,6 +85,12 @@ namespace CASM { std::cout << "\n"; std::cout << files_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << files_opt.desc() << std::endl; std::cout << "DESCRIPTION \n" " Enumerate files used by this CASM project\n" diff --git a/src/casm/app/format.cc b/src/casm/app/format.cc index 4c89e79c4..5ff8bb272 100644 --- a/src/casm/app/format.cc +++ b/src/casm/app/format.cc @@ -51,6 +51,13 @@ namespace CASM { std::cout << std::endl; std::cout << format_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << format_opt.desc() << std::endl; + std::cout << "DESCRIPTION" << std::endl; std::cout << " This option describes the files contained within a CASM project \n"; std::cout << " and where to find them. For a summary of the directory structure\n"; @@ -173,17 +180,40 @@ namespace CASM { std::cout << "LOCATION WHEN GENERATED:\n"; std::cout << "$ROOT/prim.json\n"; - std::cout << "$ROOT/PRIM\n\n\n"; + std::cout << "$ROOT/PRIM (legacy)\n\n\n"; std::cout << "DESCRIPTION:\n"; std::cout << "'prim.json' describes the primitive cell structure. It includes the lattice \n"; std::cout << "vectors, crystal basis sites and a list of possible occupant molecules on each\n"; std::cout << "basis site.\n\n"; - std::cout << "- Molecule names are case sensitive.\n"; - std::cout << "- 'Va' is reserved for vacancies.\n"; - std::cout << "- The default tolerance for checking symmetry is 1e-5, so basis site coordinates\n"; - std::cout << " should include 6 significant digits or more.\n\n\n"; + std::cout << "'prim.json' parameters: \n\n" + + "\"title\" (string): \n" + " A title for the project. Must consist of alphanumeric characters \n" + " and underscores only. The first character may not be a number. \n\n" + + "\"lattice_vectors\" (JSON array of 3 JSON arrays of 3 numbers): \n" + " Lattice vectors for the primitive structure, in Angstroms. \n\n" + + "\"coordinate_mode\" (string): \n" + " Coordinate mode for basis sites. One of: \n" + " \"Fractional\" or \"Direct\", \n" + " \"Cartesian\" \n\n" + + "\"basis\" (JSON array of JSON objects): \n\n" + + " /\"coordinate\" (JSON array of 3 numbers): \n" + " Coordinate of the basis site with units as specified by the \n" + " the \"coordinate_mode\" parameter. The default tolerance for \n" + " checking symmetry is 1e-5, so basis site coordinates should \n" + " include 6 significant digits or more. \n" + + " /\"occupant_dof\" (JSON array of string): \n" + " A list of the possible occupant atoms (and in future versions \n" + " CASM, molecules) that on each site. The names are case \n" + " sensitive, and \"Va\" is reserved for vacancies. \n\n\n"; + std::cout << "EXAMPLE 1: An FCC ternary alloy of elements A, B, and C\n"; std::cout << "-------\n"; diff --git a/src/casm/app/import.cc b/src/casm/app/import.cc index f75ab3dc5..755be2e9c 100644 --- a/src/casm/app/import.cc +++ b/src/casm/app/import.cc @@ -99,6 +99,13 @@ namespace CASM { std::cout << std::endl; std::cout << import_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << import_opt.desc() << std::endl; + std::cout << "DESCRIPTION" << std::endl; std::cout << " Import structure specified by --pos. If it doesn't exist make a directory for it and copy data over" << std::endl; std::cout << " If a *.json file is specified, it will be interpreted as a 'calc.properties.json' file." << std::endl; diff --git a/src/casm/app/init.cc b/src/casm/app/init.cc index 6f68e782d..8d5cac5e6 100644 --- a/src/casm/app/init.cc +++ b/src/casm/app/init.cc @@ -42,6 +42,13 @@ namespace CASM { std::cout << "\n"; std::cout << init_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << init_opt.desc() << std::endl; + std::cout << "DESCRIPTION \n" << " Initialize a new CASM project in the current directory.\n" << " - Expects a prim.json file in the current directory \n" << diff --git a/src/casm/app/monte.cc b/src/casm/app/monte.cc index 8673b5650..e462c89be 100644 --- a/src/casm/app/monte.cc +++ b/src/casm/app/monte.cc @@ -14,6 +14,11 @@ namespace CASM { void print_monte_help(const po::options_description &desc) { std::cout << "\n"; std::cout << desc << std::endl; + } + + void print_monte_desc(const po::options_description &desc) { + std::cout << "\n"; + std::cout << desc << std::endl; std::cout << "DESCRIPTION\n" << " Perform Monte Carlo calculations. \n\n" << @@ -91,6 +96,11 @@ namespace CASM { return 0; } + if(vm.count("desc")) { + print_monte_desc(monte_opt.desc()); + return 0; + } + po::notify(vm); // throws on error, so do after help in case // there are any problems diff --git a/src/casm/app/perturb.cc b/src/casm/app/perturb.cc index 74f8070fe..72795f056 100644 --- a/src/casm/app/perturb.cc +++ b/src/casm/app/perturb.cc @@ -58,6 +58,13 @@ namespace CASM { std::cout << "\n"; std::cout << perturb_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << perturb_opt.desc() << std::endl; + std::cout << "DESCRIPTION" << std::endl; std::cout << " Generate supercells that are perturbations of a reference\n"; std::cout << " configuration. \n"; diff --git a/src/casm/app/ref.cc b/src/casm/app/ref.cc index c234369e7..f468d8e9c 100644 --- a/src/casm/app/ref.cc +++ b/src/casm/app/ref.cc @@ -191,7 +191,7 @@ namespace CASM { bool call_help = false; //quit out if there are no arguments - if(!vm.count("help")) { + if(!vm.count("help") && !vm.count("desc")) { if(vm.count("set") + vm.count("display") + vm.count("erase") + vm.count("set-auto") != 1) { std::cout << "Error in 'casm ref'. Please select one of --display, \n"; std::cout << "--set, --set-auto, or --erase to use this option." << std::endl; @@ -222,6 +222,13 @@ namespace CASM { std::cout << std::endl; std::cout << ref_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << ref_opt.desc() << std::endl; + std::cout << "DESCRIPTION" << std::endl; std::cout << " The chemical reference determines the value of the formation energy \n" " and chemical potentials calculated by CASM. \n\n" diff --git a/src/casm/app/run.cc b/src/casm/app/run.cc index e31bba527..2abb6810d 100644 --- a/src/casm/app/run.cc +++ b/src/casm/app/run.cc @@ -49,6 +49,12 @@ namespace CASM { std::cout << "\n"; std::cout << run_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << run_opt.desc() << std::endl; std::cout << "DESCRIPTION\n" << " Executes the requested command for each selected configuration,\n" << " with the path to the configuration as an argument. \n\n" diff --git a/src/casm/app/settings.cc b/src/casm/app/settings.cc index 578cda8b7..42b50d685 100644 --- a/src/casm/app/settings.cc +++ b/src/casm/app/settings.cc @@ -269,7 +269,7 @@ namespace CASM { bool call_help = false; - std::vector all_opt = {"list", + std::vector all_opt = {"list", "desc", "new-property", "new-bset", "new-calctype", "new-ref", "new-eci", "new-clex", "set-formation-energy", "erase-clex", "set-default-clex", "set-property", "set-bset", "set-calctype", "set-ref", "set-eci", "set-all", @@ -298,6 +298,12 @@ namespace CASM { args.log << "\n"; args.log << desc << std::endl; + return 0; + } + + if(vm.count("desc")) { + args.log << "\n"; + args.log << desc << std::endl; args.log << "DESCRIPTION" << std::endl; args.log << "\n"; args.log << " Often it is useful to try multiple different basis sets, \n" << @@ -363,6 +369,49 @@ namespace CASM { " and bset are maintained. \n" << " - For --set-all, all settings are switched at once. \n\n" << + " casm settings --set-cxx 'cxx' \n" << + " - Specifies compiler to use. In order of priority: \n" + " 1) User specified by 'casm settings --set-cxx' \n" + " (use '' to clear) \n" + " 2) $CASM_CXX \n" + " 3) $CXX \n" + " 4) \"g++\" \n\n" + + " casm settings --set-cxxflags 'cxxflags' \n" + " - Specifies compiler options. In order of priority: \n" + " 1) User specified by 'casm settings --set-cxxflags' \n" + " (use '' to clear) \n" + " 2) $CASM_CXXFLAGS \n" + " 3) \"-O3 -Wall -fPIC --std=c++11\" \n\n" + + " casm settings --set-soflags 'soflags' \n" + " - Specifies shared object construction flags. In order \n" + " of priority: \n" + " 1) User specified by 'casm settings --set-soflags' \n" + " (use '' to clear) \n" + " 2) $CASM_SOFLAGS \n" + " 3) \"-shared -lboost_system\" \n\n" + + " casm settings --set-casm-prefix 'casm_prefix' \n" + " - Specifies to find CASM header files in \n" + " '$CASM_PREFIX/include', \n" + " and shared libraries in \n" + " '$CASM_PREFIX/lib'. \n" + " In order of priority: \n" + " 1) User specified by 'casm settings --set-casm-prefix' \n" + " (use '' to clear) \n" + " 2) $CASM_PREFIX \n" + " 3) (default search paths) \n\n" + + " casm settings --set-boost-prefix 'boost_prefix' \n" + " - Specifies that boost libraries are expected in \n" + " '$CASM_BOOST_PREFIX/lib'. \n" + " In order of priority: \n" + " 1) User specified by 'casm settings --set-boost-prefix' \n" + " (use '' to clear) \n" + " 2) $CASM_BOOST_PREFIX \n" + " 3) (default search paths) \n\n" + " casm settings --set-view-command 'casm.view \"open -a /Applications/VESTA/VESTA.app\"'\n" << " - Sets the command used by 'casm view' to open \n" << " visualization software. \n" << diff --git a/src/casm/app/status.cc b/src/casm/app/status.cc index 103b7ee94..341126e04 100644 --- a/src/casm/app/status.cc +++ b/src/casm/app/status.cc @@ -17,16 +17,61 @@ namespace CASM { This will be called the 'project root directory' or project's 'location'.\n\ - Add a 'prim.json' file to the directory describing the primitive cell. \n\ See 'casm format --prim' for the format of the 'prim.json' file. \n\ -- Execute: 'casm init --name myproject' \n\ -- The 'basis_sets' and 'cluster_expansions' directories are created with a\n\ - default format. \n\ -- If necessary, set compilation options using \n\ - 'casm settings --set-compile-options' and \n\ - 'casm settings --set-so-options'. \n\ - This may be necessary if, for instance, the CASM header files are \n\ - installed in a location that is not in your default compiler search path.\n\ -- Subsequently, work on 'myproject' can be done by executing 'casm' from \n\ - the project's root directory or any subdirectory. \n\ +- Execute: 'casm init' \n\ +- Several directories are created: \n\ + 'symmetry' \n\ + 'basis_sets' \n\ + 'training_data' \n\ + 'cluster_expansions' \n\ +- If necessary, set configuration options for runtime compilation and \n\ + linking by using the 'casm settings' command or by setting environment \n\ + variables. \n\ + \n\ + 'cxx': \n\ + Specifies compiler to use. In order of priority: \n\ + 1) User specified by 'casm settings --set-cxx' (use '' to clear) \n\ + 2) $CASM_CXX \n\ + 3) $CXX \n\ + 4) \"g++\" \n\ +\n\ + 'cxxflags': \n\ + Compiler flags. In order of priority: \n\ + 1) User specified by 'casm settings --set-cxxflags' \n\ + 2) $CASM_CXXFLAGS \n\ + 3) \"-O3 -Wall -fPIC --std=c++11\" \n\ +\n\ + 'soflags': \n\ + Shared object construction flags. In order of priority: \n\ + 1) User specified by 'casm settings --set-soflags' \n\ + 2) $CASM_SOFLAGS \n\ + 3) \"-shared -lboost_system\" \n\ +\n\ + 'casm_prefix': \n\ + If not in a standard search path, CASM header files are expected in \n\ + '$CASM_PREFIX/include', and shared libraries in '$CASM_PREFIX/lib'. \n\ + In order of priority: \n\ + 1) User specified by 'casm settings --set-casm-prefix' \n\ + 2) $CASM_PREFIX \n\ + 3) (default search paths) \n\ +\n\ + Note: For the 'casm' Python package, $LIBCASM and $LIBCCASM, have \n\ + highest priority for locating libcasm and libccasm, respectively. \n\ +\n\ + 'boost_prefix': \n\ + If not in a standard search path, boost libraries are expected in \n\ + '$CASM_BOOST_PREFIX/lib'. \n\ + In order of priority: \n\ + 1) User specified by 'casm settings --set-boost-prefix' \n\ + 2) $CASM_BOOST_PREFIX \n\ + 3) (default search paths) \n\ +\n\ + Note: If shared libraries are installed in non-standard locations, you \n\ + may need to set: \n\ + (Linux) export LD_LIBRARY_PATH=$CASM_PREFIX/lib:$CASM_BOOST_PREFIX/lib:$LD_LIBRARY_PATH \n\ + (Mac) export DYLD_FALLBACK_LIBRARY_PATH=$CASM_PREFIX/lib:$CASM_BOOST_PREFIX/lib:$DYLD_FALLBACK_LIBRARY_PATH \n\ +\n\ +- Subsequently, work on the CASM project can be done by executing 'casm' \n\ + from the project's root directory or any subdirectory. \n\ \n\ - See 'casm format' for descriptions and locations of the 'prim.json' file.\n"; } @@ -316,6 +361,12 @@ Instructions for fitting ECI: \n\n\ std::cout << "\n"; std::cout << status_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << status_opt.desc() << std::endl; std::cout << "DESCRIPTION" << std::endl; std::cout << " Get status information for the current CASM project.\n\n"; diff --git a/src/casm/app/super.cc b/src/casm/app/super.cc index fdccbe22c..388915785 100644 --- a/src/casm/app/super.cc +++ b/src/casm/app/super.cc @@ -121,7 +121,7 @@ namespace CASM { try { po::store(po::parse_command_line(args.argc, args.argv, super_opt.desc()), vm); // can throw - if(!vm.count("help")) { + if(!vm.count("help") && !vm.count("desc")) { if(!vm.count("duper")) { if(vm.count("transf-mat") + vm.count("get-transf-mat") != 1) { std::cerr << "Error in 'casm super'. Only one of --transf-mat or --get-transf-mat may be chosen." << std::endl; @@ -145,6 +145,12 @@ namespace CASM { std::cout << "\n"; std::cout << super_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << super_opt.desc() << std::endl; std::cout << "DESCRIPTION" << std::endl; std::cout << " \n" << " casm super --transf-mat T \n" << diff --git a/src/casm/app/sym.cc b/src/casm/app/sym.cc index df949a4e2..da50c9b9f 100644 --- a/src/casm/app/sym.cc +++ b/src/casm/app/sym.cc @@ -45,6 +45,12 @@ namespace CASM { std::cout << "\n"; std::cout << sym_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << sym_opt.desc() << std::endl; std::cout << "DESCRIPTION" << std::endl; std::cout << " Display symmetry group information.\n"; diff --git a/src/casm/app/update.cc b/src/casm/app/update.cc index 83ab1e3e3..ef91c9f43 100644 --- a/src/casm/app/update.cc +++ b/src/casm/app/update.cc @@ -63,6 +63,13 @@ namespace CASM { if(vm.count("help")) { std::cout << std::endl; std::cout << update_opt.desc() << std::endl; + + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << update_opt.desc() << std::endl; std::cout << "DESCRIPTION" << std::endl; std::cout << " Updates all values and files after manual changes or configuration \n"; std::cout << " calculations.\n"; diff --git a/src/casm/app/view.cc b/src/casm/app/view.cc index f791c6ed9..f9b057f01 100644 --- a/src/casm/app/view.cc +++ b/src/casm/app/view.cc @@ -47,6 +47,12 @@ namespace CASM { std::cout << "\n"; std::cout << view_opt.desc() << std::endl; + return 0; + } + + if(vm.count("desc")) { + std::cout << "\n"; + std::cout << view_opt.desc() << std::endl; std::cout << "This allows opening visualization programs directly from \n" "CASM. It iterates over all selected configurations and \n" "one by one writes a POSCAR and executes \n" diff --git a/src/casm/completer/Handlers.cc b/src/casm/completer/Handlers.cc index bdc5afc27..2288937bf 100644 --- a/src/casm/completer/Handlers.cc +++ b/src/casm/completer/Handlers.cc @@ -261,7 +261,8 @@ namespace CASM { void OptionHandlerBase::add_help_suboption() { m_desc.add_options() - ("help,h", "Print help message"); + ("help,h", "Print help message") + ("desc", "Print extended usage description"); return; } From 3c5b15ca962bd31b1a8285adc4d469cba3e187db Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 03:08:25 -0400 Subject: [PATCH 04/10] casm format update --- src/casm/app/format.cc | 209 ++++++++++++++++++++++++++++------------- src/casm/app/status.cc | 7 +- 2 files changed, 144 insertions(+), 72 deletions(-) diff --git a/src/casm/app/format.cc b/src/casm/app/format.cc index 5ff8bb272..f42c362f5 100644 --- a/src/casm/app/format.cc +++ b/src/casm/app/format.cc @@ -22,11 +22,14 @@ namespace CASM { ("vasp", "Description and location of VASP settings files") ("comp", "Description and location of 'composition_axes.json' file") ("bspecs", "Description and location of 'bspecs.json' file") + ("clust", "Description and location of 'clust.json' file") + ("basis", "Description and location of 'basis.json' file") + ("clex", "Description and location of '$TITLE_Clexulator.*' files") ("ref", "Description and location of 'chemical_reference.json' files") ("scel", "Description and location of 'SCEL' file") ("lat", "Description and location of 'LAT' files") ("pos", "Description and location of 'POS' files") - ("fit", "Description and location of the 'energy', 'corr.in', and 'eci.in' files") + ("eci", "Description and location of 'eci.json' file") ("monte", "Description and location of the Monte Carlo input file"); return; } @@ -102,8 +105,9 @@ namespace CASM { std::cout << " crystal_point_group.json \n"; std::cout << " $ROOT/basis_sets/$CURR_BSET/ \n"; std::cout << " bspecs.json \n"; + std::cout << " basis.json \n"; std::cout << " clust.json \n"; - std::cout << " $NAME_Clexulator.cc \n"; + std::cout << " $TITLE_Clexulator.* \n"; std::cout << " $ROOT/training_data/ \n"; std::cout << " SCEL \n"; std::cout << " $ROOT/training_data/settings/$CURR_CALCTYPE/ \n"; @@ -122,10 +126,7 @@ namespace CASM { std::cout << " (VASP results) \n"; std::cout << " properties.calc.json \n"; std::cout << " $ROOT/cluster_expansions/clex.formation_energy/$CURR_BSET/$CURR_CALCTYPE/$CURR_REF/$CURR_ECI\n"; - std::cout << " energy \n"; - std::cout << " corr.in \n"; - std::cout << " eci.in \n"; - std::cout << " eci.out \n"; + std::cout << " eci.json \n"; std::cout << " \n"; std::cout << " \n"; std::cout << " Variable descriptions: \n"; @@ -148,6 +149,8 @@ namespace CASM { std::cout << " transformation matrix. \n"; std::cout << " \n"; std::cout << " $CONFIGID: Configuration id, a unique integer. \n"; + std::cout << " \n"; + std::cout << " $TITLE: Title of the CASM project \n"; std::cout << "\n"; std::cout << " Note: The 'settings' heirarchy can be located at the project \n"; std::cout << " level as shown above, or at the supercell or configuration level\n"; @@ -170,7 +173,7 @@ namespace CASM { std::cout << "EXAMPLE:\n"; std::cout << "-------\n"; std::cout << - "{\n \"compile_options\" : \"g++ -O3 -Wall -fPIC --std=c++11\",\n \"curr_bset\" : \"default\",\n \"curr_calctype\" : \"default\",\n \"curr_clex\" : \"formation_energy\",\n \"curr_eci\" : \"default\",\n \"curr_properties\" : [ \"relaxed_energy\" ],\n \"curr_ref\" : \"default\",\n \"name\" : \"ZrO\",\n \"so_options\" : \"g++ -shared -lboost_system\",\n \"tol\" : 0.000010000000\n}\n"; + "{\n \"cluster_expansions\" : {\n \"formation_energy\" : {\n \"bset\" : \"default\",\n \"calctype\" : \"default\",\n \"eci\" : \"default\",\n \"name\" : \"formation_energy\",\n \"property\" : \"formation_energy\",\n \"ref\" : \"default\"\n }\n },\n \"crystallography_tol\" : 1.000000000000000082e-05,\n \"curr_properties\" : [ \"relaxed_energy\" ],\n \"default_clex\" : \"formation_energy\",\n \"lin_alg_tol\" : 1.000000000000000036e-10,\n \"name\" : \"ZrO\",\n \"nlist_sublat_indices\" : [ 2, 3 ],\n \"nlist_weight_matrix\" : [\n [ 2, -1, 0 ],\n [ -1, 2, 0 ],\n [ 0, 0, 5 ]\n ],\n \"query_alias\" : {\n },\n \"view_command\" : \"casm.view \\\"open -a /Applications/VESTA/VESTA.app\\\"\"\n}" << std::endl; std::cout << "-------\n"; std::cout << std::endl << std::endl; } @@ -678,28 +681,28 @@ LCHARG = .FALSE.\n"; " attribute with its index as the key. The keys should not be \n" << " repeats of any of the standard_axes. \n\n" << - "standard_axes/composition_axes:components \n" << + "standard_axes/custom_axes:components \n" << " A JSON array containing the names of possible species. \n\n" << - "standard_axes/composition_axes:independent_compositions \n" << + "standard_axes/custom_axes:independent_compositions \n" << " The number of independent composition axes. \n\n" << - "standard_axes/composition_axes:origin \n" << + "standard_axes/custom_axes:origin \n" << " The composition of origin the of composition axes in terms of \n" << " number of each component species per primitive cell, ordered as in\n" << " the 'components' array. \n\n" << - "standard_axes/composition_axes:a, b, c, ... \n" << + "standard_axes/custom_axes:a, b, c, ... \n" << " The composition of end members a, b, c, etc. in terms of number of\n" << " each component species per primitive cell, ordered as in the \n" << " 'components' array. \n\n" << - "standard_axes/composition_axes:param_formula: \n" << + "standard_axes/custom_axes:param_formula: \n" << " The formula that converts 'comp_n' (# of each component per \n" << " primitive cell) to 'comp' (composition relative the selected \n" << " composition axes). \n\n" << - "standard_axes/composition_axes:mol_formula: \n" << + "standard_axes/custom_axes:mol_formula: \n" << " The formula that converts 'comp' (composition relative the \n" << " selected composition axes) to 'comp_n' (# of each component per \n" << " primitive cell). \n\n\n"; @@ -775,6 +778,113 @@ LCHARG = .FALSE.\n"; } + if(vm.count("clust")) { + std::cout << "\n### clust.json ##################\n\n"; + + std::cout << "LOCATION:\n"; + std::cout << "$ROOT/basis_sets/$CURR_BSET/clust.json\n"; + std::cout << "\n\n"; + + std::cout << "DESCRIPTION:\n"; + std::cout << "This JSON file contains the coordinates of sites in the prototype \n" << + "clusters generated using the 'bspecs.json' specifications. \n\n"; + + + std::cout << "Prototype clusters can be accessed via: \n" + " [\"branches\"][branch_index][\"orbits\"][orbit_index][\"prototype\"]\n\n" + + "\"prototype\": (JSON object) \n" + + " /\"max_length\": (number) \n" + " Maximum pair distance between sites in the cluster \n\n" + + " /\"min_length\": (number) \n" + " Minimum pair distance between sites in the cluster \n\n" + + " /\"sites\": (JSON array of Integral coordinates) \n" + " An array listing sites in the prototype cluster using Integral\n" + " coordinates. Integral coordinates are 4-element integer arrays\n" + " indicating sublattice index, b, followed by unit cell indices,\n" + " i, j, k. \n\n" + + "\"bspecs\": (JSON object) \n" + " For reference, the contents of the 'bspecs.json' file used to \n" + " generate these clusters is reproduced here. \n\n" + + "\"lattice\": (JSON object) \n" + " For reference, so that the Integral coordinates can be converted \n" + " into Fractional or Cartesian coordinates, the lattice vectors \n" + " of the primitive structure are reproduced here. \n\n" << std::endl; + } + + if(vm.count("basis")) { + std::cout << "\n### basis.json ##################\n\n"; + + std::cout << "LOCATION:\n"; + std::cout << "$ROOT/basis_sets/$CURR_BSET/basis.json\n"; + std::cout << "\n\n"; + + std::cout << "DESCRIPTION:\n"; + std::cout << "This JSON file contains the basis functions generated using the \n" + "'bspecs.json' specifications. \n\n"; + + + std::cout << "\"site_functions\": (JSON array of JSON object) \n" + " Gives the site basis functions. One JSON object for each basis \n" + " site. \n\n" + + " /\"sublat\": (int) \n" + " Basis site index. \n\n" + + " /\"asym_unit\": (int) \n" + " Index of the asymmetric unit this basis site belongs to. \n\n" + + " /\"basis\": (JSON object) \n" + " Gives the value of each site basis function for each possible \n" + " occupant. Of the form: \n\n" + " { \n" + " \"\\\\phi_b_i\": { \n" + " \"A\": val, \n" + " \"B\": val, \n" + " ... \n" + " }, \n" + " ... \n" + " } \n" + + "\"cluster_functions\": (JSON array of JSON object) \n" + " Gives the cluster basis functions. One JSON object for each \n" + " cluster basis function. \n\n" + + " /\"linear_function_index\": (int) \n" + " Linear function index. This corresponds to ECI indices. \n\n" + + " /\"mult\": (int) \n" + " Multiplicity of symmetrically equivalent cluter functions. \n\n" + + " /\"orbit\": (JSON array of 3 int) \n" + " Gives the cluster branch index, cluster orbit index, and index\n" + " of this basis function in the cluster basis. \n\n" + + " /\"prototype\": (JSON object) \n" + " Specifies the prototype cluster, as in the 'clust.json' file. \n\n" + + " /\"prototype_function\": (string) \n" + " Latex-style function for the prototype cluster. \n\n" << std::endl; + } + + if(vm.count("clex")) { + std::cout << "\n### $TITLE_Clexulator.* ##################\n\n"; + + std::cout << "LOCATION:\n"; + std::cout << "$ROOT/basis_sets/$CURR_BSET/$TITLE_Clexulator.*\n"; + std::cout << "\n\n"; + + std::cout << "DESCRIPTION:\n"; + std::cout << "$TITLE_Clexulator.cc contains C++ code generated by CASM for \n" + "the cluster basis functions. It is automatically compiled into \n" + "$TITLE_Clexulator.o and $TITLE_Clexulator.so for use by CASM. \n\n" << std::endl; + } + if(vm.count("ref")) { std::cout << "\n### ref ##################\n\n"; @@ -993,61 +1103,26 @@ Direct\n\ } - if(vm.count("fit")) { - std::cout << "\n### fit ##################\n\n"; + if(vm.count("eci")) { + std::cout << "\n### eci.json ##################\n\n"; - std::cout << "LOCATION WHEN GENERATED:\n"; - std::cout << "$ROOT/cluster_expansions/clex.formation_energy/$CURR_BSET/$CURR_CALCTYPE/$CURR_REF/$CURR_ECI/energy\n"; - std::cout << "$ROOT/cluster_expansions/clex.formation_energy/$CURR_BSET/$CURR_CALCTYPE/$CURR_REF/$CURR_ECI/corr.in\n"; - std::cout << "$ROOT/cluster_expansions/clex.formation_energy/$CURR_BSET/$CURR_CALCTYPE/$CURR_REF/$CURR_ECI/eci.in\n\n\n"; + std::cout << "LOCATION:\n"; + std::cout << "$ROOT/cluster_expansions/clex.formation_energy/$CURR_BSET/$CURR_CALCTYPE/$CURR_REF/$CURR_ECI/eci.json\n"; + std::cout << "\n\n"; std::cout << "DESCRIPTION:\n"; - std::cout << "The 'energy' file contains information about every selected \n" << - "configuration that will be included as training data for fitting ECI.\n\n" << - - "1st column: \n" << - " Formation energy determined from the reference states. \n" << - " (See 'casm ref' and 'casm format --ref_state' for details) \n\n" << - - "2nd column: \n" << - " Weight to be placed on configuration when fitting energies with \n" << - " eci_search. \n\n" << - - "3rd and following columns: \n" << - " Composition of configuration. For a system with N independent \n" << - " occupants there will be N-1 columns (see 'casm comp') \n\n" << - - "2nd column from back: \n" << - " Distance to convex hull. Groundstates will have a value of 0.0000.\n\n" << + std::cout << "This is a copy of the $ROOT/basis_sets/$CURR_BSET/'basis.json' file \n" + "with the following additions: \n\n" - "Last column: \n" << - " Path to configuration. \n\n" << + "\"cluster_functions\": (JSON array of JSON object) \n\n" - "The energy file is to be used together with the corr.in and eci.in \n" << - "files to fit the cluster expansion using the eci_search program. \n\n"; + " /\"eci\": (number, optional, default=0.0) \n" + " The value of the ECI for the cluster basis function. If not \n" + " given, use 0.0. \n\n" - std::cout << "The 'corr.in' file contains a matrix of correlations for each \n" << - "selected configuration. \n\n"; - - std::cout << "The 'eci.in' file contains a list of calculated correlations and \n" << - "can be used to control with correlations are fit by 'eci_search'. \n\n"; - - std::cout << "The 'eci.out' file contains the fitted ECI as calculated by 'eci_search'.\n\n"; - - std::cout << "EXAMPLE: energy\n"; - std::cout << "-------\n"; - std::cout << - "#formation_energy n/a n/a n/a path \n\ -0.0000000000000 1.000000000000 1.000000000000 0.000000000000 /home/user/science/supercells/SCEL1_1_1_1_0_0_0/0 \n\ -0.0000000000000 1.000000000000 0.000000000000 0.000000000000 /home/user/science/supercells/SCEL1_1_1_1_0_0_0/1 \n\ --0.415501770000 1.000000000000 0.500000000000 0.243052905000 /home/user/science/supercells/SCEL2_1_1_2_0_0_0/0 \n\ --0.658554675000 1.000000000000 0.500000000000 0.000000000000 /home/user/science/supercells/SCEL2_1_2_1_0_0_1/0 \n\ --0.307639756667 1.000000000000 0.666666666667 0.131644213333 /home/user/science/supercells/SCEL3_1_1_3_0_0_0/0 \n\ --0.243993753333 1.000000000000 0.333333333333 0.277377812022 /home/user/science/supercells/SCEL3_1_1_3_0_0_0/1 \n\ --0.388569660000 1.000000000000 0.666666666667 0.050714310000 /home/user/science/supercells/SCEL3_1_3_1_0_0_1/0 \n\ --0.444539536667 1.000000000000 0.333333333333 0.076832028688 /home/user/science/supercells/SCEL3_1_3_1_0_0_1/1 \n\ --0.377047050000 1.000000000000 0.666666666667 0.062236920000 /home/user/science/supercells/SCEL3_1_3_1_0_0_2/0 \n"; - std::cout << "-------\n"; + "\"fit\": (JSON object) \n" + " Data from 'casm-learn' specifying how the ECI where generated and\n" + " some goodness of fit measures. \n\n" << std::endl; } @@ -1088,9 +1163,9 @@ Direct\n\ "\"model\": (JSON object) \n\n" << - " /\"clex\", /\"bset\", /\"calctype\", /\"ref\", /\"eci\": (string)\n" << - " The CASM project settings that should be used for the monte \n" << - " carlo calculation. \n\n\n" << + " /\"formation_energy\": (string, optional, default=\"formation_energy\")\n" << + " Specifies the cluster expansion to use to calculated formation \n" + " energy. Should be one of the ones listed by 'casm settings -l'.\n\n\n" << "\"supercell\": (3x3 JSON arrays of integers) \n" << @@ -1333,13 +1408,13 @@ Direct\n\ std::cout << "EXAMPLE: Settings for an incremental Metropolis calculation \n" << "with increasing temperature in automatic convergence mode.\n"; std::cout << "-------\n"; - std::cout << "{\n \"comment\" : \"This is a sample input file. Unrecognized attributes (like the ones prepended with '_' are ignored.\",\n \"debug\" : false,\n \"ensemble\" : \"grand_canonical\",\n \"method\" : \"metropolis\",\n \"model\" : {\n \"clex\" : \"formation_energy\",\n \"bset\" : \"default\",\n \"calctype\" : \"default\",\n \"ref\" : \"default\",\n \"eci\" : \"default\"\n },\n \"supercell\" : [\n [10, 0, 0],\n [0, 10, 0],\n [0, 0, 10]\n ],\n \"data\" : {\n \"sample_by\" : \"pass\",\n \"sample_period\" : 1,\n \"_N_sample\" : 1000, \n \"_N_pass\" : 1000,\n \"_N_step\" : 1000,\n \"_max_pass\" : 10000,\n \"min_pass\" : 1000,\n \"_max_step\" : 10000,\n \"_max_sample\" : 500,\n \"_min_sample\" : 100,\n \"confidence\" : 0.95,\n \"measurements\" : [ \n { \n \"quantity\" : \"formation_energy\"\n },\n { \n \"quantity\" : \"potential_energy\"\n },\n { \n \"quantity\" : \"atom_frac\"\n },\n { \n \"quantity\" : \"site_frac\"\n },\n { \n \"quantity\" : \"comp\",\n \"precision\" : 1e-3\n },\n { \n \"quantity\" : \"comp_n\"\n }\n ],\n \"storage\" : {\n \"write_observations\" : false,\n \"write_trajectory\" : false,\n \"output_format\" : [\"csv\", \"json\"]\n }\n },\n \"driver\" : {\n \"mode\" : \"incremental\", \n \"motif\" : {\n \"configname\" : \"auto\",\n \"_configname\" : \"SCEL3_3_1_1_0_2_2/0\",\n \"_configdof\" : \"path/to/final_state.json\"\n },\n \"initial_conditions\" : {\n \"param_chem_pot\" : {\n \"a\" : -1.75\n },\n \"temperature\" : 100.0,\n \"tolerance\" : 0.001\n },\n \"final_conditions\" : {\n \"param_chem_pot\" : {\n \"a\" : -1.75\n },\n \"temperature\" : 1000.0,\n \"tolerance\" : 0.001\n },\n \"incremental_conditions\" : {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 10.0,\n \"tolerance\" : 0.001\n }\n }\n}\n"; + std::cout << "{\n \"comment\" : \"This is a sample input file. Unrecognized attributes (like the ones prepended with '_' are ignored.\",\n \"debug\" : false,\n \"ensemble\" : \"grand_canonical\",\n \"method\" : \"metropolis\",\n \"model\" : {\n \"formation_energy\" : \"formation_energy\"\n },\n \"supercell\" : [\n [10, 0, 0],\n [0, 10, 0],\n [0, 0, 10]\n ],\n \"data\" : {\n \"sample_by\" : \"pass\",\n \"sample_period\" : 1,\n \"_N_sample\" : 1000, \n \"_N_pass\" : 1000,\n \"_N_step\" : 1000,\n \"_max_pass\" : 10000,\n \"min_pass\" : 1000,\n \"_max_step\" : 10000,\n \"_max_sample\" : 500,\n \"_min_sample\" : 100,\n \"confidence\" : 0.95,\n \"measurements\" : [ \n { \n \"quantity\" : \"formation_energy\"\n },\n { \n \"quantity\" : \"potential_energy\"\n },\n { \n \"quantity\" : \"atom_frac\"\n },\n { \n \"quantity\" : \"site_frac\"\n },\n { \n \"quantity\" : \"comp\",\n \"precision\" : 1e-3\n },\n { \n \"quantity\" : \"comp_n\"\n }\n ],\n \"storage\" : {\n \"write_observations\" : false,\n \"write_trajectory\" : false,\n \"output_format\" : [\"csv\", \"json\"]\n }\n },\n \"driver\" : {\n \"mode\" : \"incremental\", \n \"motif\" : {\n \"configname\" : \"auto\",\n \"_configname\" : \"SCEL3_3_1_1_0_2_2/0\",\n \"_configdof\" : \"path/to/final_state.json\"\n },\n \"initial_conditions\" : {\n \"param_chem_pot\" : {\n \"a\" : -1.75\n },\n \"temperature\" : 100.0,\n \"tolerance\" : 0.001\n },\n \"final_conditions\" : {\n \"param_chem_pot\" : {\n \"a\" : -1.75\n },\n \"temperature\" : 1000.0,\n \"tolerance\" : 0.001\n },\n \"incremental_conditions\" : {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 10.0,\n \"tolerance\" : 0.001\n }\n }\n}\n"; std::cout << "-------\n\n"; std::cout << "EXAMPLE: Settings for an custom drive mode LTE1 calculation with\n" << "increasing temperature.\n"; std::cout << "-------\n"; - std::cout << "{\n \"comment\" : \"This is a sample input file. Unrecognized attributes (like the ones prepended with '_' are ignored.\",\n \"debug\" : false,\n \"ensemble\" : \"grand_canonical\",\n \"method\" : \"lte1\",\n \"model\" : {\n \"clex\" : \"formation_energy\",\n \"bset\" : \"default\",\n \"calctype\" : \"default\",\n \"ref\" : \"default\",\n \"eci\" : \"default\"\n },\n \"supercell\" : [\n [9, 0, 0],\n [0, 9, 0],\n [0, 0, 9]\n ],\n \"data\" : {\n \"storage\" : {\n \"write_observations\" : false,\n \"write_trajectory\" : false,\n \"output_format\" : [\"csv\", \"json\"]\n }\n },\n \"driver\" : {\n \"mode\" : \"incremental\", \n \"motif\" : {\n \"configname\" : \"auto\",\n \"_configname\" : \"SCEL3_3_1_1_0_2_2/0\",\n \"_configdof\" : \"path/to/final_state.json\"\n },\n \"custom_conditions\" : [\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 100.0,\n \"tolerance\" : 0.001\n },\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 200.0,\n \"tolerance\" : 0.001\n },\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 400.0,\n \"tolerance\" : 0.001\n },\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 800.0,\n \"tolerance\" : 0.001\n }\n ]\n }\n}\n"; + std::cout << "{\n \"comment\" : \"This is a sample input file. Unrecognized attributes (like the ones prepended with '_' are ignored.\",\n \"debug\" : false,\n \"ensemble\" : \"grand_canonical\",\n \"method\" : \"lte1\",\n \"model\" : {\n \"formation_energy\" : \"formation_energy\"\n },\n \"supercell\" : [\n [9, 0, 0],\n [0, 9, 0],\n [0, 0, 9]\n ],\n \"data\" : {\n \"storage\" : {\n \"write_observations\" : false,\n \"write_trajectory\" : false,\n \"output_format\" : [\"csv\", \"json\"]\n }\n },\n \"driver\" : {\n \"mode\" : \"incremental\", \n \"motif\" : {\n \"configname\" : \"auto\",\n \"_configname\" : \"SCEL3_3_1_1_0_2_2/0\",\n \"_configdof\" : \"path/to/final_state.json\"\n },\n \"custom_conditions\" : [\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 100.0,\n \"tolerance\" : 0.001\n },\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 200.0,\n \"tolerance\" : 0.001\n },\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 400.0,\n \"tolerance\" : 0.001\n },\n {\n \"param_chem_pot\" : {\n \"a\" : 0.0\n },\n \"temperature\" : 800.0,\n \"tolerance\" : 0.001\n }\n ]\n }\n}\n"; std::cout << "-------\n"; } diff --git a/src/casm/app/status.cc b/src/casm/app/status.cc index 341126e04..50a076d78 100644 --- a/src/casm/app/status.cc +++ b/src/casm/app/status.cc @@ -18,11 +18,8 @@ namespace CASM { - Add a 'prim.json' file to the directory describing the primitive cell. \n\ See 'casm format --prim' for the format of the 'prim.json' file. \n\ - Execute: 'casm init' \n\ -- Several directories are created: \n\ - 'symmetry' \n\ - 'basis_sets' \n\ - 'training_data' \n\ - 'cluster_expansions' \n\ +- Several directories are created: 'symmetry', 'basis_sets', \n\ + 'training_data', and 'cluster_expansions' \n\ - If necessary, set configuration options for runtime compilation and \n\ linking by using the 'casm settings' command or by setting environment \n\ variables. \n\ From 57b2176bd582032521b6222be14f8aa48de1cf12 Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 03:43:27 -0400 Subject: [PATCH 05/10] update 'casm status' for enumeration and 'casm enum' help descriptions --- src/casm/app/enum.cc | 34 ++++++++++++++++++++++++++-------- src/casm/app/status.cc | 35 +++++++++++++++++------------------ 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/casm/app/enum.cc b/src/casm/app/enum.cc index 71d57ac45..229899eb4 100644 --- a/src/casm/app/enum.cc +++ b/src/casm/app/enum.cc @@ -46,7 +46,7 @@ namespace CASM { ("supercells,s", "Enumerate supercells") ("configs,c", "Enumerate configurations") ("matrix,m", po::value(&m_matrix_path)->value_name(ArgHandler::path()), "Specify a matrix to apply to the primitive cell before beginning enumeration") - ("lattice-directions,z", po::value(&m_lattice_directions_str), "Restrict enumeration along a, b or c lattice vectors"); + ("lattice-directions,z", po::value(&m_lattice_directions_str)->default_value("abc"), "Restrict enumeration along a, b or c lattice vectors"); return; } @@ -92,11 +92,14 @@ namespace CASM { std::cout << "\n"; std::cout << enum_opt.desc() << std::endl; - std::cout << "DESCRIPTION" << std::endl; - std::cout << " Enumerate supercells and configurations\n"; - std::cout << " - expects a PRIM file in the project root directory \n"; - std::cout << " - if --min is given, then --max must be given \n"; - std::cout << std::endl; + std::cout << "DESCRIPTION\n" << std::endl; + + std::cout << " casm enum --supercells --max V \n" + " - To enumerate supercells up to volume V (units: number\n" + " of primitive cells) \n" + " - Use --matrix and --lattice-directions for restricted\n" + " enumeration: \n\n"; + std::cout << " --matrix" << std::endl; std::cout << " - When using --supercells, you may use the this option " << std::endl; std::cout << " to specify a transformation matrix to apply to your primitive " << std::endl; @@ -131,9 +134,24 @@ namespace CASM { std::cout << " If this is the case, then the meaning of 'a', 'b' and 'c' changes" << std::endl; std::cout << " from the lattice vectors of your PRIM, to the vectors of the " << std::endl; std::cout << " lattice resulting from multiplying your PRIM by the specified " << std::endl; - std::cout << " matrix." << std::endl; + std::cout << " matrix.\n\n"; + + std::cout << " casm enum --configs --all' \n" + " - To enumerate configurations for all supercells. \n\n" + + " casm enum --configs --min MINV --max MAXV \n" + " - To enumerate configurations for supercells ranging in\n" + " volume from MINV to MAXV (units: number of primitive \n" + " cells). \n\n" + " casm enum --configs --scellname NAME \n" + " - To enumerate configurations for a particular \n" + " supercell. \n\n" + " casm enum --configs [...] --filter '... casm query commands...' \n" + " - To perform restricted enumeration of configurations \n" + " such that only configurations the return '1' (true) \n" + " for a particular 'casm query' command are retained. \n\n"; return 0; } @@ -267,7 +285,7 @@ namespace CASM { } else if(vm.count("configs")) { - if(vm.count("dimensions") || vm.count("matrix") || vm.count("lattice-directions")) { + if(vm.count("dimensions") || vm.count("matrix") || (dims < 3)) { std::cerr << "Option --configs in conjunction with limited supercell enumeration is currently unsupported" << std::endl; return ERR_INVALID_ARG; } diff --git a/src/casm/app/status.cc b/src/casm/app/status.cc index 50a076d78..bea3f173e 100644 --- a/src/casm/app/status.cc +++ b/src/casm/app/status.cc @@ -69,8 +69,7 @@ namespace CASM { \n\ - Subsequently, work on the CASM project can be done by executing 'casm' \n\ from the project's root directory or any subdirectory. \n\ -\n\ -- See 'casm format' for descriptions and locations of the 'prim.json' file.\n"; +- See 'casm format --prim' for description and location of the 'prim.json' file.\n"; } void composition_unselected() { @@ -83,10 +82,8 @@ namespace CASM { - Then execute 'casm composition -s <#>' to select one of the listed axes.\n\ - If no standard composition axis is satisfactory, edit the file \n\ 'composition_axes.json' to add your own custom composition axes to the \n\ - 'custom_axes' JSON object.\n\n"; - - std::cout << - "- See 'casm format' for a description and the location of \n\ + 'custom_axes' JSON object.\n\ +- See 'casm format --comp' for description and the location of \n\ the 'composition_axes.json' file.\n\n"; } @@ -99,10 +96,11 @@ namespace CASM { "Enumerate supercells\n\ - Execute: 'casm enum --supercells --max V' to enumerate supercells up to \n\ volume V (units: number of primitive cells). \n\ -- Supercells are listed in the SCEL file.\n\n"; - - std::cout << - "- See 'casm format' for a description and location of the \n\ +- Supercells are listed in the SCEL file.\n\ +- See 'casm enum --desc' for extended help documentation on how to use the\n\ + '--matrix' and '--lattice-directions' options to perform restricted \n\ + supercell enumeration (i.e. 2d, 1d, multiples of other supercells). \n\ +- See 'casm format' for a description and location of the \n\ 'SCEL' file.\n\n"; } @@ -122,14 +120,15 @@ namespace CASM { - Execute: 'casm enum --configs --scellname NAME' to enumerate \n\ configurations for a particular supercell. \n\ - Generated configurations are listed in the 'config_list.json' file. \n\ - This file should not usually be edited manually. \n\n"; - - std::cout << - "- See 'casm format' for a description and location of \n\ - the 'config_list.json' file. \n\ - - See 'casm format' for a description and location of \n\ - the data files related to a particular configuration.\n\n"; - + This file should not usually be edited manually. \n\ +- See 'casm enum --desc' for extended help documentation on how to use \n\ + '--filter' command to perform restricted enumeration of \n\ + configurations. \n\ +- Once you have a cluster expansion, see 'casm format --monte' for \n\ + a description of how to save configurations enumerated during Monte \n\ + Carlo calculations. \n\ +- See 'casm format --config' for a description and location of \n\ + the 'config_list.json' file. \n\n"; } void configs_uncalculated() { From 5447bdd528fea14c41f8bbf43f4b6dbbf2906972 Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 04:14:39 -0400 Subject: [PATCH 06/10] update casm status --- src/casm/app/status.cc | 107 ++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 50 deletions(-) diff --git a/src/casm/app/status.cc b/src/casm/app/status.cc index bea3f173e..e572fc999 100644 --- a/src/casm/app/status.cc +++ b/src/casm/app/status.cc @@ -121,6 +121,8 @@ namespace CASM { configurations for a particular supercell. \n\ - Generated configurations are listed in the 'config_list.json' file. \n\ This file should not usually be edited manually. \n\ +- Use the 'casm view' command to quickly view configurations in your \n\ + favorite visualization program. See 'casm view -h' for help. \n\ - See 'casm enum --desc' for extended help documentation on how to use \n\ '--filter' command to perform restricted enumeration of \n\ configurations. \n\ @@ -144,27 +146,31 @@ Instructions for volume relaxed VASP energies: \n\n\ settings files. \n\ - Select which configurations to calculate properties for using the \n\ 'casm select' command. Use 'casm select --set on' to select all \n\ - configurations. By default, the 'is selected?' state of each \n\ + configurations. By default, the 'selected' state of each \n\ configuration is stored by CASM in the master config_list.json file, \n\ - located in the hidden '.casm' directory. You can also save additional\n\ - selection using the 'casm select -o' option to write a selection to a\n\ - file. Selections may be operated on to create new selections that \n\ - are subsets, unions, or intersections of existing selections. \n\ - Selection files may also be edited manually or via programs for more \n\ + located in the hidden '.casm' directory. The standard selections \n\ + 'MASTER', 'CALCULATED', 'ALL', or 'NONE' may always be used. \n\ +- You can also save additional selection using the 'casm select -o' \n\ + option to write a selection to a file. \n\ +- Selections may be operated on to create new selections that are \n\ + subsets, unions, or intersections of existing selections. \n\ +- Selection files may also be edited manually or via programs for more \n\ complex selections than currently supported by 'casm select'. For all\n\ options related to selection configurations, see 'casm select -h'. \n\ - Selections may be used to query the properties of particular \n\ configurations using the 'casm query' command. See 'casm query -h' \n\ for the complete list of options. \n\ -- Execute 'casm run -e \"vasp.relax\" --write-pos' to submit \n\ - VASP jobs for all selected configurations. This depends on the python\n\ - modules 'pbs', 'casm', and 'vasp' being installed and the script \n\ - 'vasp.relax' being in the PATH. Only configurations which have not \n\ - yet been calculated will run. \n\ - *Note: You can also use 'casm run -e \"vasp.setup\" --write-pos to \n\ - setup VASP input files for all selected configuration, but not submit\n\ - the jobs. This is often a useful first step to check that input files\n\ - have been prepared correctly.* \n\ +- Execute: 'casm run -e \"vasp.relax\"' to submit VASP jobs for all \n\ + selected configurations. \n\ +- This depends on the python modules 'pbs', 'casm', and 'vasp' being \n\ + installed and the script 'vasp.relax' being in the PATH. Only \n\ + configurations which have not yet been calculated will run. \n\ +- Note: You can also use 'casm run -e \"vasp.setup\"' to setup VASP \n\ + input files for all selected configuration, but not submit the jobs. \n\ + This is often a useful first step to check that input files have been\n\ + prepared correctly. \n\ +- Note: The functionality of 'vasp.relax' and 'vasp.setup' is now also \n\ + included in the 'casm-calc' program. See 'casm-calc -h' for help. \n\ - VASP results will be stored at: \n\ '$ROOT/training_data/$SCELNAME/$CONFIGID/$CURR_CALCTYPE/properties.calc.json'\n\ Results in 'properties.calc.json' are expected to be ordered to match\n\ @@ -221,11 +227,9 @@ Instructions for generating basis functions: \n\n\ See 'casm format --bspecs' for an example file. \n\ - Execute 'casm bset -u' to generate basis functions. If you edit the \n\ 'bspecs.json' file, execute 'casm bset -u' again to update basis \n\ - functions. \n\n"; - - std::cout << - "- See 'casm format --bspecs' for a description and location of \n\ - the 'bspecs.json' files.\n\n"; + functions. \n\ +- See 'casm format --bspecs' for description and location of the \n\ + 'bspecs.json' file.\n\n"; } void eci_uncalculated() { @@ -235,43 +239,46 @@ Instructions for generating basis functions: \n\n\ "Fit effective cluster interactions (ECI)\n\ \n\ Instructions for fitting ECI: \n\n\ +- Create a new directory within the CASM project, for example: \n\ + mkdir fit_1 && cd fit_1 \n\ - Select which configurations to use as the training data with the \n\ - 'casm select' command. Use 'casm select --set on' to select all \n\ - configurations. See 'casm select -h' for options. \n\ -- Execute 'casm fit' to generate input files for fitting eci with the \n\ - program 'eci_search'. \n\ -- Execute 'eci_search -h' for descriptions of the available fitting \n\ - options \n\ -- Results will be stored at: \n\ - 'root/cluster_expansions/clex.formation_energy/SCELNAME/CURR_CALCTYPE/CURR_REF/CURR_ECI/energy\n\ - 'root/cluster_expansions/clex.formation_energy/SCELNAME/CURR_CALCTYPE/CURR_REF/CURR_ECI/eci.in\n\ - 'root/cluster_expansions/clex.formation_energy/SCELNAME/CURR_CALCTYPE/CURR_REF/CURR_ECI/corr.in\n\n"; - - std::cout << - "- See 'casm format --fit' for a description and location of \n\ - the 'energy', 'eci.in', and 'corr.in' files.\n\n"; + 'casm select' command. To select all calculated configurations: \n\ + casm select --set 'is_calculated' -o train \n\ +- See 'casm select -h' for more options. \n\ +- Create a 'casm-learn' input file. Several example input files can be \n\ + generated from 'casm-learn --exMethodName'. For example: \n\ + casm-learn --exGeneticAlgorithm > fit_1_ga.json \n\ + This file can be edited to adjust the problem being solved (training \n\ + data, weighting scheme, cross validation sets and scoring, linear \n\ + estimator method, feature selection method, etc.) \n\ +- See 'casm-learn --settings-format' for description and help with the \n\ + input file. \n\ +- Execute: 'casm-learn -s fit_1_ga.json' \n\ +- Results are stored in a Hall Of Fame file containing the best \n\ + solutions as determined from cross validation scores. \n\ +- Different estimator methods (LinearRegression, Lasso, etc.) and \n\ + different feature selection methods (GeneticAlgorithm, RFE, etc.) can\n\ + be used with the same problem specs (training data, weighting scheme,\n\ + cross validation sets and scoring) and compared in a single Hall Of \n\ + Fame. \n\ +- When some candidate ECI have been stored in a Hall Of Fame, use the \n\ + 'casm-learn --checkhull' option to check if ground state configurations \n\ + are accurately predicted by the cluster expansion. \n\ +- When ready, use 'casm-learn --select' to write an 'eci.json' file to \n\ + use for Monte Carlo. \n\ +- See 'casm format --eci' for a description and location of the \n\ + 'eci.json' files.\n\n"; } - void advanced_steps() { + void montecarlo() { std::cout << "NEXT STEPS:\n\n"; std::cout << - "Advanced steps\n\ - \n\ -- Alternative calculation settings, composition axes, or reference \n\ - states can be explored within a single CASM project. \n\ + "Monte Carlo calculations\n\ \n\ -- Use 'casm settings' to add a new calculation type. This will create \n\ - directories with alternative VASP calculation settings, such as a \n\ - different psuedopotential or spin polarization setting. Then use \n\ - 'casm run' as usual to calculate configuration properties using the \n\ - alternative settings. \n\ - \n\ -- Use 'casm settings' to add an alternative reference states, then use \n\ - 'casm composition' and 'casm ref' as usual to set alternative \n\ - composition axes or reference states.\n\n"; - +- Use 'casm monte' to run Monte Carlo calculations. \n\ +- See 'casm monte --format' and 'casm monte -h' for help. \n\n"; } int update_eci_format(fs::path root) { @@ -697,7 +704,7 @@ Instructions for fitting ECI: \n\n\ if(vm.count("next")) { std::cout << "\n#################################\n\n"; - advanced_steps(); + montecarlo(); } else { std::cout << "For next steps, run 'casm status -n'\n\n"; From ba6602b73e7019184ea6ec21cae1707f40723034 Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 04:15:37 -0400 Subject: [PATCH 07/10] remove 'casm status -u' --- src/casm/app/status.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/casm/app/status.cc b/src/casm/app/status.cc index e572fc999..18483360c 100644 --- a/src/casm/app/status.cc +++ b/src/casm/app/status.cc @@ -342,8 +342,7 @@ Instructions for fitting ECI: \n\n\ m_desc.add_options() ("next,n", "Write next steps") ("warning,w", "Suppress warnings") - ("details,d", "Print detailed information") - ("update,u", "Update file formats for current version"); + ("details,d", "Print detailed information"); return; } @@ -391,11 +390,6 @@ Instructions for fitting ECI: \n\n\ } - if(vm.count("update")) { - return update_format(find_casmroot(fs::current_path())); - } - - /// 1) Check if a project exists std::cout << "\n#################################\n\n"; From e020091d12222287702e48ac3d670540d023bdc6 Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 04:37:03 -0400 Subject: [PATCH 08/10] create 'casm status -all' command, so you can see all the casm steps --- src/casm/app/status.cc | 72 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/src/casm/app/status.cc b/src/casm/app/status.cc index 18483360c..4cee6619c 100644 --- a/src/casm/app/status.cc +++ b/src/casm/app/status.cc @@ -342,7 +342,8 @@ Instructions for fitting ECI: \n\n\ m_desc.add_options() ("next,n", "Write next steps") ("warning,w", "Suppress warnings") - ("details,d", "Print detailed information"); + ("details,d", "Print detailed information") + ("all,a", "Print all 'casm status -n' help messages"); return; } @@ -396,6 +397,12 @@ Instructions for fitting ECI: \n\n\ std::cout << "CASM status:\n\n"; + if(vm.count("all")) { + std::cout << "\n#################################\n\n"; + status_unitialized(); + } + + const fs::path &root = args.root; if(root.empty()) { @@ -463,9 +470,15 @@ Instructions for fitting ECI: \n\n\ } std::cout << std::endl << std::endl; - /// 2) Composition axes + if(vm.count("all")) { + std::cout << "\n#################################\n\n"; + std::cout << "\n2) Composition axes \n\n"; + composition_unselected(); + + } + std::cout << "2) Composition axes \n"; std::cout << "- Composition axes selected: "; @@ -498,7 +511,18 @@ Instructions for fitting ECI: \n\n\ /// 3) Configuration generation - std::cout << "3) Generate configurations \n"; + + if(vm.count("all")) { + std::cout << "\n#################################\n\n"; + std::cout << "3) Generate configurations \n\n"; + + supercells_ungenerated(); + + configs_ungenerated(); + + } + + std::cout << "\n3) Generate configurations \n"; int tot_gen = 0; int tot_calc = 0; @@ -555,11 +579,18 @@ Instructions for fitting ECI: \n\n\ std::cout << std::endl << std::endl; + /// 4) Calculate configuration properties - /// 4) Calculate configuration properties + if(vm.count("all")) { + std::cout << "\n#################################\n\n"; + std::cout << "4) Calculate configuration properties\n\n"; + configs_uncalculated(); + + } std::cout << "4) Calculate configuration properties\n"; + std::cout << "- Current calctype: " << calctype << "\n"; std::cout << "- Current cluster expansion: " << desc.name << "\n"; std::cout << "- Number of configurations calculated: " << tot_calc << " / " << tot_gen << " generated (Update with 'casm update')\n\n"; @@ -615,7 +646,15 @@ Instructions for fitting ECI: \n\n\ /// 5) Choose chemical reference + + if(vm.count("all")) { + std::cout << "\n#################################\n\n"; + std::cout << "5) Choose chemical reference\n\n"; + references_unset(); + } + std::cout << "5) Choose chemical reference\n"; + std::cout << "- Chemical reference set: "; if(primclex.has_chemical_reference()) { std::cout << "TRUE" << "\n"; @@ -647,9 +686,15 @@ Instructions for fitting ECI: \n\n\ std::cout << std::endl; - /// 6) Generate basis functions: + + if(vm.count("all")) { + std::cout << "\n#################################\n\n"; + std::cout << "6) Generate basis functions: \n\n"; + bset_uncalculated(); + } + std::cout << "6) Generate basis functions: "; if(!fs::exists(dir.clexulator_src(settings.name(), bset))) { @@ -668,9 +713,14 @@ Instructions for fitting ECI: \n\n\ } std::cout << "TRUE\n\n\n"; + /// 7) Fit effective cluster interactions (ECI): - /// 7) Fit effective cluster interactions (ECI): + if(vm.count("all")) { + std::cout << "\n#################################\n\n"; + std::cout << "7) Fit effective cluster interactions (ECI): \n\n"; + eci_uncalculated(); + } std::cout << "7) Fit effective cluster interactions (ECI): "; @@ -690,23 +740,19 @@ Instructions for fitting ECI: \n\n\ } std::cout << "TRUE\n\n\n"; - - /// 7) Advanced steps + /// 7) Monte Carlo std::cout << std::endl; - if(vm.count("next")) { + if(vm.count("next") || vm.count("all")) { std::cout << "\n#################################\n\n"; - + std::cout << "8) Monte Carlo Calculations: \n\n"; montecarlo(); } else { std::cout << "For next steps, run 'casm status -n'\n\n"; } - - - return 0; }; From 88529e3feb4972e84533eb5e77e94ed2800cc3e7 Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 11:02:16 -0400 Subject: [PATCH 09/10] update README and INSTALL documentation --- INSTALL.md | 161 +++++++++++++++++++++++++---------------- README.md | 13 +--- SConstruct | 6 +- casmenv.sh | 38 +++++----- src/casm/app/status.cc | 19 ++--- 5 files changed, 131 insertions(+), 106 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 95a7dd86c..9ed89c171 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -78,13 +78,18 @@ Boost can be downloaded and installed from source following instructions found a CASM includes python modules for automating the submission and analysis of VASP calculations. They have been most extensively tested using Python 2.7.5, and should be compatible with versions 2.x+. (*Note however that for recent versions of SCons, support for Python versions before 2.7 has been deprecated.*) The latest version can be obtained from the Python website: [https://www.python.org](https://www.python.org) -**NumPy** +Individual module dependencies include: -Individual module dependencies include NumPy ([http://www.numpy.org](http://www.numpy.org)), which can be obtained by installing SciPy using one of the methods described on their website: [http://www.scipy.org/install.html](http://www.scipy.org/install.html). +- **SciPy** ([https://www.scipy.org](https://www.scipy.org)), which can be obtained using one of the methods described on their website: [http://www.scipy.org/install.html](http://www.scipy.org/install.html). The particular SciPy packages needed are: + - **numpy** ([http://www.numpy.org](http://www.numpy.org)) + - **pandas** ([http://pandas.pydata.org](http://pandas.pydata.org)) -**pbs** +- **scikit-learn** ([http://scikit-learn.org](http://scikit-learn.org)) -The Python module pbs is used to automate submission and management of PBS batch jobs on a cluster. It can be obtained from its GitHub repository: [https://github.com/prisms-center/pbs](https://github.com/prisms-center/pbs). +- **deap** ([http://deap.readthedocs.io/en/master/](http://deap.readthedocs.io/en/master/)), the Distributed Evolutionary Algorithm Package, used for genetic algorithms. + - **scoop** ([http://scoop.readthedocs.io/en/latest/](http://scoop.readthedocs.io/en/latest/)), required for deap. + +- **pbs** The Python module pbs is used to automate submission and management of PBS batch jobs on a cluster. It can be obtained from its GitHub repository: [https://github.com/prisms-center/pbs](https://github.com/prisms-center/pbs). *Note: This is not the pbs module available for installation via pip.* ### Included with CASM @@ -124,7 +129,8 @@ CASM is built using SCons, but some configuration of environment variables may b and is also reproduced here: - Type: 'scons' to build all binaries, + Type: 'scons configure' to run configuration checks, + 'scons' to build all binaries, 'scons install' to install all libraries, binaries, scripts and python packages, 'scons test' to run all tests, 'scons unit' to run all unit tests, @@ -140,78 +146,99 @@ and is also reproduced here: Recognized environment variables: - $CXX: + $CASM_CXX, $CXX: Explicitly set the C++ compiler. If not set, scons chooses a default compiler. - $CASMPREFIX: + $CASM_PREFIX: Where to install CASM. By default, this uses '/usr/local'. Then header files are - installed in '$CASMPREFIX/include', shared libraries in '$CASMPREFIX/lib', executables - in '$CASMPREFIX/bin', and the path is used for the setup.py --prefix option for + installed in '$CASM_PREFIX/include', shared libraries in '$CASM_PREFIX/lib', executables + in '$CASM_PREFIX/bin', and the path is used for the setup.py --prefix option for installing python packages. - $CASMBOOST_PATH: - Search path for Boost. '$CASMBOOST_PATH/include' is searched for header files, and - '$CASMBOOST_PATH/lib' for libraries. Boost and CASM should be compiled with the + $CASM_BOOST_PREFIX: + Search path for Boost. '$CASM_BOOST_PREFIX/include' is searched for header files, and + '$CASM_BOOST_PREFIX/lib' for libraries. Boost and CASM should be compiled with the same compiler. - - $OPTIMIZATIONLEVEL: + + $CASM_OPTIMIZATIONLEVEL: Sets the -O optimization compiler option. If not set, uses -O3. - $DEBUGSTATE: + $CASM_DEBUGSTATE: Sets to compile with debugging symbols. In this case, the optimization level gets set to -O0, and NDEBUG does not get set. - $LD_LIBRARY_PATH: - Search path for dynamic libraries, may need $CASMBOOST_PATH/lib - and $CASMPREFIX/lib added to it. - On Mac OS X, this variable is $DYLD_FALLBACK_LIBRARY_PATH. + $LD_LIBRARY_PATH (Linux) or $DYLD_FALLBACK_LIBRARY_PATH (Mac): + Search path for dynamic libraries, may need $CASM_BOOST_PREFIX/lib + and $CASM_PREFIX/lib added to it. This should be added to your ~/.bash_profile (Linux) or ~/.profile (Mac). + $CASM_BOOST_NO_CXX11_SCOPED_ENUMS: + If defined, will compile with -DCASM_BOOST_NO_CXX11_SCOPED_ENUMS. Use this + if linking to boost libraries compiled without c++11. + Additional options that override environment variables: Use 'cxx=X' to set the C++ compiler. Default is chosen by scons. 'opt=X' to set optimization level, '-OX'. Default is 3. 'debug=X' with X=0 to use '-DNDEBUG', - or with X=1 to set debug mode compiler options '-O0 -g -save-temps'. - Overrides $DEBUGSTATE. - 'prefix=X' to set installation directory. Default is '/usr/local'. - Overrides $CASMPREFIX. - 'boost_path=X' set boost search path. Overrides $CASMBOOST_PATH. + or with X=1 to set debug mode compiler options '-O0 -g -save-temps'. + Overrides $CASM_DEBUGSTATE. + 'prefix=X' to set installation directory. Default is '/usr/local'. Overrides $CASM_PREFIX. + 'boost_prefix=X' set boost search path. Overrides $CASM_BOOST_PPREFIX. + 'boost_no_cxx11_scoped_enums=1' to use '-DBOOST_NO_CXX11_SCOPED_ENUMS'. + Overrides $CASM_BOOST_NO_CXX11_SCOPED_ENUMS. + + Use scons -H for help about command-line options. +The script ``casmenv.sh`` provides a list of environment variables that you are recogized by CASM during installation and use. A copy of this file can be used to configure your environment before installing or using CASM. For instance: + + mkdir $HOME/modules + cp /path/to/CASMcode/casmenv.sh $HOME/modules/casm + ... edit $HOME/modules/casm ... -For example, on a cluster where Boost is installed in a shared directory ``/home/software/rhel6/boost/1.54.0-gcc-4.7.0`` (*Important: Boost and CASM should be compiled with the same compiler.*), and your executables and Python modules are stored in your userspace at ``$HOME/software``, you could add the following to the ``.bash_profile`` file in your home directory: +Then to set your environment before installing or using CASM: + + source $HOME/modules/casm - export CASMBOOST_PATH=/home/software/rhel6/boost/1.54.0-gcc-4.7.0 - export CASMPREFIX=$HOME/software - export PATH=$PATH:$CASMPREFIX/bin - export CPATH=$CPATH:$CASMPREFIX/include - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CASMPREFIX/lib +If you are working in a shared computing environment where other modules such as VASP must be imported, they can also be imported in this script. -and then run ``source $HOME/.bash_profile`` for these changes to take effect. These commands to set environment variables could also be run from the command line, in which case the environment variables only persist until your terminal session is closed. *Note: On Mac OS X, use ``DYLD_FALLBACK_LIBRARY_PATH`` instead of ``LD_LIBRARY_PATH``.* +After setting up your environment, run: + + cd /path/to/CASMcode + scons configure + +to perform a number of configuration checks. Once they pass, you are ready to install CASM. #### Build and install Once any necessary environment variables are set, you are ready to build and install. Move to the directory in which the CASM source resides and run ``scons install``: - cd /path/to/CASM + cd /path/to/CASMcode scons install *Note: Use 'scons install -j N', where N is number of jobs, to enable multi-threaded compilation. This can make a nice difference.* This will compile and install: -- ``$CASMPREFIX/bin/casm`` the primary CASM program -- ``$CASMPREFIX/bin/eci_search`` a program for fitting effective cluster interactions (ECI) -- ``$CASMPREFIX/bin/vasp.setup`` a script for setting up VASP jobs -- ``$CASMPREFIX/bin/vasp.relax`` a script for setting up and submitting VASP jobs -- ``$CASMPREFIX/bin/kpoint_converge`` a script for performing k-point convergence -- ``$CASMPREFIX/include/casm/`` headers files for ``libcasm`` -- ``$CASMPREFIX/lib/libcasm.so`` a shared library containing much of the implementation. May be ``libcasm.dylib`` on Mac OS X. -- ``$CASMPREFIX/lib/pythonX.Y/site-packages/casm`` a python package that provides an interface between ``casm`` and the software used to calculate training data. Currently only VASP is supported. -- ``$CASMPREFIX/lib/pythonX.Y/site-packages/vasp`` a python package for running VASP calculations. +- ``$CASM_PREFIX/bin/casm`` the primary CASM program +- ``$CASM_PREFIX/bin/casm-learn`` a program for fitting effective cluster interactions (ECI) +- ``$CASM_PREFIX/bin/casm-calc`` a program that helps setup and run high throughput *ab initio* calculations +- ``$CASM_PREFIX/include/casm/`` headers files for ``libcasm`` +- ``$CASM_PREFIX/lib/libcasm.*`` a shared library containing much of the implementation. May be ``libcasm.dylib`` on Mac OS X. +- ``$CASM_PREFIX/lib/libccasm.*`` a shared library providing a C interface to ``libcasm.*`` used by the ``casm`` Python package +- ``$CASM_PREFIX/lib/pythonX.Y/site-packages/casm`` a python package that provides an interface between ``casm`` and the software used to calculate training data. Currently only VASP is supported. +- ``$CASM_PREFIX/lib/pythonX.Y/site-packages/vasp`` a python package for running VASP calculations. + +The functionality provided by ``casm-calc`` is also provided by the legacy scripts: + +- ``$CASM_PREFIX/bin/vasp.setup`` a script for setting up VASP jobs +- ``$CASM_PREFIX/bin/vasp.relax`` a script for setting up and submitting VASP jobs +- ``$CASM_PREFIX/bin/vasp.relax.report`` a script for setting up and submitting VASP jobs + + #### Checking installation #### @@ -249,65 +276,71 @@ If ``casm`` is installed correctly, execute ``casm`` from any directory and you **Frequently encountered issues**: -- I tried to install (``scons install``) or uninstall (``scons install -c``), but get errors about not having permission. - - If you don't have permissions to write to ``/usr/local/``, as is usual on a computer cluster, you can change the environment variable ``$CASMPREFIX`` to a location that you do have permission to use. For instance, you can create a software directory in your home directory: +- **I tried to install (``scons install``) or uninstall (``scons install -c``), but get errors about not having permission.** + - If you don't have permissions to write to ``/usr/local/``, as is usual on a computer cluster, you can change the environment variable ``$CASM_PREFIX`` in your configure script ``$HOME/modules/casm`` to a location that you do have permission to use. For instance, you can create a software directory in your home directory: cd ~ mkdir software - Then you can edit the ``.bash_profile`` file in your home directory to set your ``$PATH`` and libary search path to include your software directory by adding the lines: + To make the changes take effect open a new session and - export CASMPREFIX=$HOME/software - export PATH=$PATH:$CASMPREFIX/bin - export CPATH=$CPATH:$CASMPREFIX/include - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CASMPREFIX/lib - - To make the changes take effect use ``source ~/.bash_profile`` or open a new session. Then try installing again (``cd /path/to/CASM; scons install``). *Note: On Mac OS X, use ``DYLD_FALLBACK_LIBRARY_PATH`` instead of ``LD_LIBRARY_PATH``.* + source $HOME/modules/casm + + Then try installing again + + cd /path/to/CASMcode + scons install - If you have administrative access you can install using ``sudo``, although this is not recommended. For example: ``sudo scons install``. -- I installed CASM without errors, but when I try to execute ``casm`` I get the error: +- **I installed CASM without errors, but when I try to execute ``casm`` I get the error**: $ casm -bash: casm: command not found - If ``scons install`` ran without error, this means that ``casm`` was installed in a directory that is not in your $PATH. You can check what directories are searched for executables using ``echo $PATH``. Possible solutions include: - 1. Uninstall CASM and re-install it in a directory that is in your ``$PATH``. This can be accomplished by uninstalling (``scons install -c``) and changing ``$CASMPREFIX`` such that ``$CASMPREFIX/bin`` is in your ``$PATH`` (``export $CASMPREFIX=/some/place/in_my_path/) - 1. Append the location where ``casm`` was installed to your ``$PATH``. This can be accomplished by adding the line ``export PATH=$PATH:/path/to/bin`` to the file ``.bash_profile`` in your home directory, where ``/path/to/bin`` is replaced with the actual path to the directory where ``casm`` is installed. + If ``scons install`` ran without error, this means that ``casm`` was installed in a directory that is not in your $PATH. You can check what directories are searched for executables using ``echo $PATH``. One solution is to: + 1. Uninstall CASM and re-install it in a directory that is in your ``$PATH``. This can be accomplished by uninstalling (``scons install -c``) and changing ``$CASM_PREFIX`` in ``$HOME/modules/casm`` such that ``$CASM_PREFIX/bin`` is in your ``$PATH``. To make the changes take effect open a new session and + + source $HOME/modules/casm -- I installed CASM without errors, but when I try to execute ``casm`` I get the error: +- **I installed CASM without errors, but when I try to execute ``casm`` I get the error**: $ casm casm: error while loading shared libraries: libcasm.so: cannot open shared object file: No such file or directory This means ``casm`` has been installed correctly but the shared library ``libcasm.so`` (or ``libcasm.dylib`` on Mac OS X) is not being found in any of the places being searched. Possible solutions include: - - (Linux): Update the default library search path using ``ldconfig``. For example, see [this](http://codeyarns.com/2014/01/14/how-to-add-library-directory-to-ldconfig-cache/). - - Change the ``LD_LIBRARY_PATH`` environment variable to specify which directory to search for ``libcasm`` by editing the ``.bash_profile`` file in your home directory to include: + - Check that the ``LD_LIBRARY_PATH`` environment variable in ``$HOME/modules/casm`` is specifyign which directory to search for ``libcasm``: - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$CASM_PREFIX/lib:$LD_LIBRARY_PATH - *Note: On Mac OS X, use ``DYLD_FALLBACK_LIBRARY_PATH`` instead of ``LD_LIBRARY_PATH``.* - + To make the changes take effect open a new session and + + source $HOME/modules/casm + + *Note: On Mac OS X, use ``DYLD_FALLBACK_LIBRARY_PATH`` instead of ``LD_LIBRARY_PATH``.* + - (Linux): Update the default library search path using ``ldconfig``. For example, see [this](http://codeyarns.com/2014/01/14/how-to-add-library-directory-to-ldconfig-cache/). + #### For developers: Testing new features #### If you are developing new features you can run all unit and integration tests from the main repository directory via: - scons test + scons unit Individual tests can also be run via: scons unit scons casm_test scons eci_search_test - # replace UnitTestName with a particular unit test (UnitTestName in tests/unit/*/UnitTestName_test.cpp) + # replace UnitTestName with a particular unit test: + # (UnitTestName in tests/unit/*/UnitTestName_test.cpp) scons UnitTestName -Individual tests may be cleaned by re-running with any of the above commands with an added ``-c``. For instance ``scons Clexulator -c`` or ``scons casm_test -c``. In particular, ``scons test`` and ``scons casm_test`` must be cleaned before re-running or there will be errors about trying to initialize a CASM project in a directory where one already exists. +Individual tests may be cleaned by re-running with any of the above commands with an added ``-c``. For instance ``scons Clexulator -c`` or ``scons casm_test -c``. -New unit tests using the Boost unit test framework can be added and automatically run by placing a ``UnitTestName_test.cpp`` file in any subdirectory of ``tests/unit`` by following the template of existing unit tests. If the unit test creates any files that it doesn't remove by itself, the ``tests/unit/SConscript`` file should be edited to enable cleaning them. +New unit tests using the Boost unit test framework can be added and run by placing a ``UnitTestName_test.cpp`` file in any subdirectory of ``tests/unit``, following the template of existing unit tests. If the unit test needs to link to libraries or creates any files that it doesn't remove by itself, the ``tests/unit/SConscript`` file should be edited to enable link needed libraries and clean generated them. diff --git a/README.md b/README.md index 2bf831cb7..b770b3ce9 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ This version of CASM supports: - Occupational degrees of freedom. - High-throughput calculations using: - VASP: [https://www.vasp.at](https://www.vasp.at) +- Semi-Grand canonical Monte Carlo calculations CASM is updated frequently with support for new effective Hamiltonians, new interfaces for first-principles electronic structure codes, and new Monte Carlo methods. Collaboration is welcome and new features can be incorporated by forking the repository on GitHub, creating a new feature, and submitting pull requests. If you are interested in developing features that involve a significant time investment we encourage you to first contact the CASM development team at . @@ -57,7 +58,7 @@ CASM is developed by the Van der Ven group, originally at the University of Mich **Developers**: John Goiri and Anirudh Natarajan. -**Other contributors**: Min-Hua Chen, Jonathon Bechtel, Max Radin, Elizabeth Decolvenaere and Anna Belak +**Other contributors**: Min-Hua Chen, Jonathon Bechtel, Max Radin, Elizabeth Decolvenaere, Anna Belak, Liang Tian, and Naga Sri Harsha Gunda #### Acknowledgements #### @@ -89,7 +90,7 @@ See INSTALL.md The ``casm`` executable includes extensive help documentation describing the various commands and options. Simply executing ``casm`` will display a list of possible commands, and executing ``casm -h`` will display help documentation particular to the chosen command. -For a beginner, the best place to start is to follow the suggestions printed when calling ``casm status -n``. This provides step-by-step instructions for creating a CASM project, generating symmetry information, setting composition axes, enumerating configurations, calculating energies with VASP, setting reference states, and fitting an effective Hamiltonian. The subcommand ``casm format`` provides information on the directory structure of the CASM project and the format of all the CASM files. +For a beginner, the best place to start is to follow the suggestions printed when calling ``casm status -n``. This provides step-by-step instructions for creating a CASM project, generating symmetry information, setting composition axes, enumerating configurations, calculating energies with VASP, setting reference states, and fitting an effective Hamiltonian using the program ``casm-learn``. ``casm-learn`` provides The subcommand ``casm format`` provides information on the directory structure of the CASM project and the format of all the CASM files. All that is needed to start a new project is a ``prim.json`` file describing the crystal structure of the material being studied. See ``casm format --prim`` for a description and examples. Typically one will create a new project directory containing the ``prim.json`` file and then initialize the casm project. For example: @@ -108,15 +109,9 @@ All that is needed to start a new project is a ``prim.json`` file describing the After initializing a casm project: -- ``casm`` generates code that is compiled and linked at runtime in order to evaluate effective Hamiltonians in a highly optimized manner. If you installed the CASM header files in a location that is not in your default search path you must specify in your CASM project settings where to find the header files. You can inspect the current settings via ``casm settings -l``, and then add the correct include path via ``casm settings --set-compile-options``. For example: - - casm settings --set-compile-options 'g++ -O3 -Wall -fPIC --std=c++11 -I/path/to/include/casm' - -- Shared object compilation options may be set via ``casm settings --set-so-options``. For example (using the default settings): +- ``casm`` generates code that is compiled and linked at runtime in order to evaluate effective Hamiltonians in a highly optimized manner. If you installed the CASM header files and libraries in a location that is not in your default search path you must specify where to find them. Often the default compilation options work well, but there are some cases when the c++ compiler, compiler flags, or shared object construction flags might need to be customized. You can inspect the current settings via ``casm settings -l`` and options to change them via ``casm settings --desc``. - casm settings --set-so-options 'g++ -shared -lboost_system' -An html tutorial describing the creation of an example CASM project and typical steps is coming soon. diff --git a/SConstruct b/SConstruct index 69ab4bcee..4cb902567 100644 --- a/SConstruct +++ b/SConstruct @@ -6,7 +6,8 @@ import sys, os, glob, copy, shutil, subprocess, imp, re from os.path import join Help(""" - Type: 'scons' to build all binaries, + Type: 'scons configure' to run configuration checks, + 'scons' to build all binaries, 'scons install' to install all libraries, binaries, scripts and python packages, 'scons test' to run all tests, 'scons unit' to run all unit tests, @@ -43,10 +44,9 @@ Help(""" Sets to compile with debugging symbols. In this case, the optimization level gets set to -O0, and NDEBUG does not get set. - $LD_LIBRARY_PATH: + $LD_LIBRARY_PATH (Linux) or $DYLD_FALLBACK_LIBRARY_PATH (Mac): Search path for dynamic libraries, may need $CASM_BOOST_PREFIX/lib and $CASM_PREFIX/lib added to it. - On Mac OS X, this variable is $DYLD_FALLBACK_LIBRARY_PATH. This should be added to your ~/.bash_profile (Linux) or ~/.profile (Mac). $CASM_BOOST_NO_CXX11_SCOPED_ENUMS: diff --git a/casmenv.sh b/casmenv.sh index 23049c6cc..d72a5aee5 100644 --- a/casmenv.sh +++ b/casmenv.sh @@ -15,6 +15,8 @@ # #export CASM_BOOST_PREFIX="" +# + # Recognized by install scripts. Use this if linking to boost libraries compiled without c++11. If defined, (i.e. CASM_BOOST_NO_CXX11_SCOPED_ENUMS=1) will compile with -DBOOST_NO_CXX11_SCOPED_ENUMS option. # Order of precedence: # 1) if $CASM_BOOST_NO_CXX11_SCOPED_ENUMS defined @@ -105,6 +107,17 @@ if [ ! -z ${CASM_PREFIX} ]; then fi +# If CASM_BOOST_PREFIX is set, update library search path +if [ ! -z ${CASM_BOOST_PREFIX} ]; then + + # For Linux, set LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$CASM_BOOST_PREFIX/lib:$LD_LIBRARY_PATH + + # For Mac, set DYLD_LIBRARY_FALLBACK_PATH + export DYLD_FALLBACK_LIBRARY_PATH=$CASM_BOOST_PREFIX/lib:$DYLD_FALLBACK_LIBRARY_PATH + +fi + # If testing: if [ ! -z ${CASM_REPO} ]; then @@ -114,25 +127,12 @@ if [ ! -z ${CASM_REPO} ]; then export PATH=$CASM_REPO/bin:$CASM_REPO/python/casm/scripts:$PATH export PYTHONPATH=$CASM_REPO/python/casm:$PYTHONPATH - if [ ! -z ${DYLD_FALLBACK_LIBRARY_PATH} ]; then - # For testing on Mac, use DYLD_FALLBACK_LIBRARY_PATH: - export DYLD_FALLBACK_LIBRARY_PATH=$CASM_REPO/lib:$DYLD_FALLBACK_LIBRARY_PATH - - # If CASM_BOOST_PREFIX is set, update library search path - if [ ! -z ${CASM_BOOST_PREFIX} ]; then - # For testing on Mac, set DYLD_LIBRARY_FALLBACK_PATH - export DYLD_FALLBACK_LIBRARY_PATH=$CASM_BOOST_PREFIX/lib:$DYLD_FALLBACK_LIBRARY_PATH - fi - else - # For testing on Linux, use LD_LIBRARY_PATH: - export LD_LIBRARY_PATH=$CASM_REPO/lib:$LD_LIBRARY_PATH - - # If CASM_BOOST_PREFIX is set, update library search path - if [ ! -z ${CASM_BOOST_PREFIX} ]; then - # For testing on Mac, set DYLD_LIBRARY_FALLBACK_PATH - export LD_LIBRARY_PATH=$CASM_BOOST_PREFIX/lib:$LD_LIBRARY_PATH - fi - fi + # For testing on Linux, use LD_LIBRARY_PATH: + export LD_LIBRARY_PATH=$CASM_REPO/lib:$LD_LIBRARY_PATH + + # For testing on Mac, use DYLD_FALLBACK_LIBRARY_PATH: + export DYLD_FALLBACK_LIBRARY_PATH=$CASM_REPO/lib:$DYLD_FALLBACK_LIBRARY_PATH + fi diff --git a/src/casm/app/status.cc b/src/casm/app/status.cc index 4cee6619c..61f481a46 100644 --- a/src/casm/app/status.cc +++ b/src/casm/app/status.cc @@ -160,17 +160,14 @@ Instructions for volume relaxed VASP energies: \n\n\ - Selections may be used to query the properties of particular \n\ configurations using the 'casm query' command. See 'casm query -h' \n\ for the complete list of options. \n\ -- Execute: 'casm run -e \"vasp.relax\"' to submit VASP jobs for all \n\ - selected configurations. \n\ -- This depends on the python modules 'pbs', 'casm', and 'vasp' being \n\ - installed and the script 'vasp.relax' being in the PATH. Only \n\ - configurations which have not yet been calculated will run. \n\ -- Note: You can also use 'casm run -e \"vasp.setup\"' to setup VASP \n\ - input files for all selected configuration, but not submit the jobs. \n\ - This is often a useful first step to check that input files have been\n\ - prepared correctly. \n\ -- Note: The functionality of 'vasp.relax' and 'vasp.setup' is now also \n\ - included in the 'casm-calc' program. See 'casm-calc -h' for help. \n\ +- Execute: 'casm-calc --setup' to setup VASP input files for all \n\ + selected configuration, but not submit the jobs. This is often a \n\ + useful first step to check that input files have been prepared \n\ + correctly. \n\ +- Execute: 'casm-calc --submit' to submit VASP jobs for all selected \n\ + configurations. Only configurations which have not yet been \n\ + calculated will run. \n\ +- See 'casm-calc -h' for help and other options. \n\ - VASP results will be stored at: \n\ '$ROOT/training_data/$SCELNAME/$CONFIGID/$CURR_CALCTYPE/properties.calc.json'\n\ Results in 'properties.calc.json' are expected to be ordered to match\n\ From f1bdc0fd8cd1e0764ebba0a2fb790ff77960b79b Mon Sep 17 00:00:00 2001 From: bpuchala Date: Sun, 14 Aug 2016 12:19:10 -0400 Subject: [PATCH 10/10] Add 'casm-learn --desc' with extended usage description --- python/casm/scripts/casm-learn | 180 ++++++++++++++++++++++++++++++--- 1 file changed, 167 insertions(+), 13 deletions(-) diff --git a/python/casm/scripts/casm-learn b/python/casm/scripts/casm-learn index 60db64160..40aa9e09d 100755 --- a/python/casm/scripts/casm-learn +++ b/python/casm/scripts/casm-learn @@ -9,6 +9,7 @@ import deap.tools if __name__ == "__main__": parser = argparse.ArgumentParser(description = 'Fit cluster expansion coefficients (ECI)') + parser.add_argument('--desc', help='Print extended usage description', action="store_true") parser.add_argument('-s', '--settings', nargs=1, help='Settings input filename', type=str) parser.add_argument('--format', help='Hall of fame print format. Options are "details", "json", or "csv".', type=str, default=None) #parser.add_argument('--path', help='Path to CASM project. Default assumes the current directory is in the CASM project.', type=str, default=os.getcwd()) @@ -126,28 +127,181 @@ if __name__ == "__main__": # pickle hall of fame casm.learn.save_halloffame(hall, halloffame_filename, args.verbose) - else: + elif args.desc: print \ """ - Learning is performed in four steps: - 1) Select training data. - Create a selection of configurations to include in the regression problem. + 1) Specify the problem: + + 'casm-learn' helps solve the problem: + + X*b = y, + + where: + + X: 2d matrix of shape (n_samples, n_features) + The correlation matrix, holding the evaluated basis functions. The + entry X[config, bfunc] holds the average value of the 'bfunc' cluster + basis function for configuration 'config'. The number of configurations + is 'n_samples' and the number of cluster basis functions is 'n_features'. + + y: 1d matrix of shape (n_samples, 1) + The calculated properties being fit to. The most common case is that + y[config] holds the formation energy calculated for configuration + 'config'. + + b: 1d matrix of shape (n_features, 1) + The effective cluster interactions (ECI) being solved for. + + To specify this problem, the 'casm-learn' input file specifies which + configurations to fit to (the training data), how to weight the + configurations, and how to compare solutions via cross-validation. + + + Training data may be input via a 'casm select' output file. The default + name expected is 'train'. So to use all calculated configurations, you + could create a directory in your CASM project where you will perform + fitting and generate a 'train' file: + + cd /my/casm/project + mkdir fit_1 && cd fit_1 + casm select --set is_calculated -o train + + + Example 'casm-learn' JSON input files can be output by the + 'casm-learn --exMethodName' options: + + casm-learn --exGeneticAlgorithm > fit_1_ga.json + casm-learn --exRFE > fit_1_rfe.json + ...etc.. + + By default, these settings files are prepared for fitting formation_energy, + using the 'train' configuration selection. Edit the file as needed, and + see 'casm-learn --settings-format' for help. + + + When weighting configurations, the problem is transformed: + + X*b = y -> L*X*b = L*y, + + where, W = L*L.tranpose(): + + W: 2d matrix of shape (n_samples, n_samples) + The weight matrix is specified in the casm-learn input file. If the + weighting method provides 1-dimensional input (this is typical, i.e. + a weight for each configuration), in an array called 'w', then: + + W = diag(w)*n_samples/sum(w), + + diag(w) being the diagonal matrix with 'w' along the diagonal. + + + A cross-validation score is used for comparing generated ECI. The cv score + reported is: + + cv = sqrt(mean(scores)) + N_nonzero_eci*penalty, + + where: + + scores: 1d array of shape (number of train/test sets) + The mean squared error calculated for each training/testing set + + N_nonzero_eci: int + The number of basis functions with non-zero ECI + + penalty: number, optional, default=0.0 + Is the user-input penalty per basis function that can be used to + favor solutions with a small number of non-zero ECI + + See 'casm-learn --settings-format' for help specifying the cross-validation + training and test sets using options from scikit-learn. It is usually + important to use the 'shuffle'=true option so that configurations are + randomly added to train/test sets and not ordered by supercell size. + + + When you run 'casm-learn' with a new problem specification the first time, + it generates a "problem specs" file that stores the training data, weights, + and cross-validation train/test sets. Then, when running subsequent times, + the data can be loaded more quickly, and the cross-validation can be + performed using the same train/test sets. 'casm-learn' will attempt to + prevent you from re-running with a different problem specification so that + solutions can be compared via their cv score in an "apples-to-apples" + manner. The default name for the "specs" file is determined from the input + filename. For example, 'my_input_specs.pkl' is used if the input file is + named 'my_input.json'. See 'casm-learn --settings-format' for more help. + + + The '--checkspecs' option can be used to write output files with the + generated problem specs data. Amont other things, this can be used to + adjust weights manually or save and re-use train/test sets. See + 'casm-learn --settings-format' for more help. + + + 2) Select estimator and feature selection methods + + The "estimator" option specifies a linear model estimator that determines + how to solve the linear problem L*X*b = L*b, for b. + + The "feature_selection" option specifies a feature selection method that + determines which features (ECI) should be considered for the solution. The + remaining are effectively set to 0.0 when calculating the cluster + expansion. Generally there is a tradeoff: By limiting the number of + features included in the cluster expansion Monte Carlo calculations can be + more efficient, but at a possible loss of accuracy. Be careful to avoid + overfitting however. If your cross validation scheme does not provide + enough testing data, you may fit your training data very well, but not + have an accurate extrapolation to other configurations. + + See 'casm-learn --settings-format' for help specifying the estimator and + feature selection methods. Assuming you are using the GeneticAlgorithm and + have named your input file 'fit_1_ga.json', run: + + casm-learn -s fit_1_ga.json + + 'casm-learn' will run and eventually store its results. For a single + problem specification (step 1, the settings in "problem_specs" + in the 'casm-learn' input file), you may try many different estimation + and feature selection methods and use the cv score to compare results. All + the results for a single problem specification can be stored in a 'Hall Of + Fame' that collects the N individual solutions with the best cv scores. To + view these results use: + + casm-learn -s fit_1_ga.json --hall + + For more details, or to output the results for further analysis in JSON or + CSV format, there is a '--format' option. To view only particular + individuals in the hall of fame, there is a '--indiv' option. + - 2) Select scoring metric. - Add sample weights to configurations if desired and select a cross validation - method. + 3) Analyze results + + The above steps (1) and (2) may be repeated many times as you attempt to + optimize your ECI. Solutions for different problems (i.e. different + weighting schemes, re-calculating with more training data) may be compared + based on scientific knowledge, for instance, which predicts the 0K ground + state configurations correctly, or from analysis of Monte Carlo results. + + The '--checkhull' option provides a simple way to check the 0K ground + states and can create 'casm select' style output files with enumerated but + uncalculated configurations that are predicted to be low energy. These can + then be used to generate more training data and re-fit the ECI. + + When you have generated ECI that you wish to use in Monte Carlo + calculations, use the '--select' option to write an 'eci.json' file into + your CASM project for the currently selected cluster expansion (as listed + by 'casm settings -l). - 3) Select estimator. - Choose how to solve for ECI from calculated property and correlations. For - instance: LinearRegression, Lasso, or Ridge regression. - 4) Select features. - Select which basis functions to include in the cluster expansion. For instance, - SelectFromModel along with a l-1 norm minimizing estimator. Or GeneticAlgorithm. + 4) Use results + + Once an 'eci.json' file has been written, you can run Monte Carlo + calculations. See 'casm monte -h' and 'casm format --monte' for help. + """ + else: + parser.print_help()