-
Notifications
You must be signed in to change notification settings - Fork 234
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2989 from opencog/load-csv-tables
Load CSV tables into AtomSpace
- Loading branch information
Showing
18 changed files
with
1,223 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# | ||
# This is a simple demo CSV file. | ||
# It contains a table of data, in comma-separated-value format. | ||
# You can also use tab-separated values. | ||
# | ||
# This table contains a text column header. | ||
# The column labels can be anything. | ||
# If the header is absent, default labels will be generated. | ||
# | ||
b1, b2, b3, flt1, flt2, lbl | ||
|
||
# Now for some data. Three columns of binary numbers, | ||
# Two floats, and one column of strings. | ||
0, 0, 1, 3.3, 4.4, "one" | ||
0, 0, 1, 4.4, 5.5, "one" | ||
0, 1, 1, 3.4, 6.5, "three" | ||
1, 0, 1, 2.4, 7.5, "five" | ||
|
||
# T and F are maybe better for binary ... | ||
T, F, T, 4, 9, "five" | ||
T, T, F, 5, 11, "six" | ||
T, T, T, 2, 8.9, "seven" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
; | ||
; table.scm -- Formulas applied to Values from a CSV/TSV table. | ||
; | ||
; This is similar to the `flows.scm` demo, except that the values | ||
; are feteched from a convetional DSV (delimiter-separated-value) | ||
; table. The demo is in two parts. The first part reads the table, | ||
; (a one-liner) and explores how it is represented in the AtomSpace. | ||
; The second part applies some formulas to the table columns. | ||
; | ||
; The second part of the demo is intereasting, because it shows how | ||
; functions, written in Atomese, can be applied to tables, and how | ||
; a "utility function" or a "scoring function" can be written. | ||
; Utility functions are commonly used in machine learning, they | ||
; provide a grand-total score that can be maximized or minized during | ||
; training. The interesting point here is that the scoring function | ||
; is represented in Atomese: it is some tree, some DAG of inputs. | ||
; These trees can be randomly generated and mutated, thus allowing | ||
; genetic-programming algorithms to be implemented in the AtomSpace. | ||
; | ||
; This is. of course, exactly what AS-MOSES does. This is effectively | ||
; a demo of a sub-component of the AS-MOSES subsystem. | ||
; | ||
(use-modules (opencog) (opencog exec)) | ||
(use-modules (opencog csv-table)) | ||
|
||
; Create an Atom on which the table will be located. | ||
(define tab (Concept "My foo Table")) | ||
|
||
; Load the table (located in this directory.) | ||
(load-table tab "table.csv") | ||
|
||
; Verify that the table loaded. First, take a look at all of the keys: | ||
(cog-keys tab) | ||
|
||
; The ordered list of all the columns will be located at the | ||
; "well-known predicate". All tables will have this; it is an | ||
; ordered list of the columns in the table (in the same order | ||
; as the file.) | ||
(define colkeys (Predicate "*-column-keys-*")) | ||
(cog-value tab colkeys) | ||
|
||
; Verify that the data for each column is present. | ||
; Loop over the columns, and print the keys and values on them. | ||
(for-each | ||
(lambda (KEY) | ||
(format #t "The key ~A holds data ~A\n" KEY (cog-value tab KEY))) | ||
(cog-value->list (cog-value tab colkeys))) | ||
; | ||
; ------------------------------------------------------------------- | ||
; Part two: apply some formulas to the columns. | ||
; | ||
; Note that `cog-value` and `cog-execute! ValueOf` return the same thing: | ||
(cog-value tab (PredicateNode "flt1")) | ||
(cog-execute! (ValueOf tab (PredicateNode "flt1"))) | ||
|
||
; Take the difference of two columns. Note that `FloatValueOf` is | ||
; used instead of `ValueOf`, so that the type-checking subsystem | ||
; is happy about the types passed to the operator. | ||
(cog-execute! | ||
(Minus | ||
(FloatValueOf tab (PredicateNode "flt2")) | ||
(FloatValueOf tab (PredicateNode "flt1")))) | ||
|
||
; The above can be wrapped into a function. Several examples follow, | ||
; below. First, a function that takes the table as an argument, | ||
; subtracts to columns, and places the result in a third column. | ||
; The column names are hard-coded in the function. | ||
|
||
(DefineLink | ||
(DefinedSchema "col diffs") | ||
(Lambda | ||
(Variable "$tbl-name") | ||
(SetValue | ||
(Variable "$tbl-name") (Predicate "f2 minus f1") | ||
(Minus | ||
(FloatValueOf (Variable "$tbl-name") (PredicateNode "flt2")) | ||
(FloatValueOf (Variable "$tbl-name") (PredicateNode "flt1")))))) | ||
|
||
; Apply the function to the table. | ||
(cog-execute! (Put (DefinedSchema "col diffs") tab)) | ||
|
||
; Verify that the new column showed up. | ||
(cog-keys tab) | ||
|
||
; .. and that it contains the expected data. | ||
(cog-value tab (Predicate "f2 minus f1")) | ||
|
||
;-------- | ||
; The AccumulateLink can be used to sum up all of the rows in a column. | ||
(cog-execute! | ||
(Accumulate (FloatValueOf tab (Predicate "f2 minus f1")))) | ||
|
||
; This can be turned into a simple scoring function. It computes the | ||
; sum-total of the difference of two columns. This is a score, in that | ||
; it is a single number that can be used as a utility function in | ||
; conventional machine-learning algos. | ||
(DefineLink | ||
(DefinedSchema "compute score") | ||
(Lambda | ||
(Variable "$tbl-name") | ||
(Accumulate | ||
(Minus | ||
(FloatValueOf (Variable "$tbl-name") (PredicateNode "flt2")) | ||
(FloatValueOf (Variable "$tbl-name") (PredicateNode "flt1")))))) | ||
|
||
; Apply the function to the table. | ||
(cog-execute! (Put (DefinedSchema "compute score") tab)) | ||
|
||
; That's all, folks. | ||
; ------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
|
||
# Generic JSON decoding. | ||
ADD_LIBRARY (csv | ||
table_read.cc | ||
) | ||
|
||
ADD_DEPENDENCIES(csv opencog_atom_types) | ||
|
||
TARGET_LINK_LIBRARIES(csv | ||
atomspace | ||
atombase | ||
${COGUTIL_LIBRARY} | ||
) | ||
|
||
INSTALL (TARGETS csv EXPORT AtomSpaceTargets | ||
DESTINATION "lib${LIB_DIR_SUFFIX}/opencog" | ||
) | ||
|
||
INSTALL (FILES | ||
table_read.h | ||
DESTINATION "include/opencog/persist/csv" | ||
) | ||
|
||
# ------------------------------- | ||
|
||
ADD_LIBRARY (csv-table | ||
TableSCM.cc | ||
) | ||
|
||
TARGET_LINK_LIBRARIES(csv-table | ||
csv | ||
atomspace | ||
smob | ||
) | ||
|
||
ADD_GUILE_EXTENSION(SCM_CONFIG csv-table "opencog-ext-path-csv-table") | ||
|
||
INSTALL (TARGETS csv-table EXPORT AtomSpaceTargets | ||
DESTINATION "lib${LIB_DIR_SUFFIX}/opencog" | ||
) | ||
|
||
# ------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
|
||
Load Ordinary CSV Tables | ||
======================== | ||
The code here is able to load "delimiter-separated values" (DSV, | ||
or CSV, TSV for comma and tab separators) from a file. This are | ||
just very conventional tables. | ||
|
||
Each column from a DSV file is read in and placed into an Atomese | ||
Values on an indicated Atom. Atomese Values are vectors (of floats, | ||
bools, strings). Each Value holds one column from the dataset. | ||
|
||
Basically, this just gets CSV data into the AtomSpace, where it | ||
becomes easy for Atomese programs to act on them, i.e. to use them | ||
as input for some kind of data stream processing. | ||
|
||
The features (columns) specified in ignore_features will be omitted | ||
from the representation. | ||
|
||
Example | ||
------- | ||
For example, a CSV dataset like this: | ||
``` | ||
o, i1, i2, i3, i4 | ||
1, 0, 0, 3.3, "foo" | ||
0, 1, 0, 4.4, "bar" | ||
``` | ||
will be loaded as key-value pairs on the `anchor` Atom. | ||
|
||
The column names will be loaded under a "well known key": | ||
``` | ||
(Predicate "*-column-keys-*") | ||
``` | ||
This key will point at a value holding a list of all of the | ||
column-keys in the table: | ||
``` | ||
(LinkValue | ||
(Predicate "o") | ||
(Predicate "i1") | ||
(Predicate "i2") | ||
(Predicate "i3") | ||
(Predicate "i4")) | ||
``` | ||
Then, under each key, there will a column of values: | ||
``` | ||
(Predicate "o") (BoolValue 1 0) | ||
(Predicate "i1") (BoolValue 0 1) | ||
(Predicate "i2") (BoolValue 0 0) | ||
(Predicate "i3") (FloatValue 3.3 4.4) | ||
(Predicate "i4") (StringValue "foo" "bar") | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* opencog/persist/csv/TableSCM.cc | ||
* | ||
* Copyright (c) 2008 by OpenCog Foundation | ||
* Copyright (c) 2008, 2009, 2013, 2015, 2022 Linas Vepstas <[email protected]> | ||
* All Rights Reserved | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License v3 as | ||
* published by the Free Software Foundation and including the exceptions | ||
* at http://opencog.org/wiki/Licenses | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program; if not, write to: | ||
* Free Software Foundation, Inc., | ||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
*/ | ||
|
||
#ifndef _OPENCOG_CSV_TABLE_SCM_H | ||
#define _OPENCOG_CSV_TABLE_SCM_H | ||
|
||
#include <opencog/guile/SchemeModule.h> | ||
|
||
namespace opencog | ||
{ | ||
/** \addtogroup grp_persist | ||
* @{ | ||
*/ | ||
|
||
class TableSCM : public ModuleWrap | ||
{ | ||
private: | ||
void init(void); | ||
|
||
void load_table(const Handle&, const std::string&); | ||
public: | ||
TableSCM(void); | ||
}; // class | ||
|
||
/** @}*/ | ||
} // namespace | ||
|
||
extern "C" { | ||
void opencog_csv_table_init(void); | ||
}; | ||
|
||
#endif // _OPENCOG_CSV_TABLE_SCM_H | ||
|
||
#include <opencog/atomspace/AtomSpace.h> | ||
#include <opencog/guile/SchemePrimitive.h> | ||
|
||
#include "table_read.h" | ||
|
||
using namespace opencog; | ||
|
||
TableSCM::TableSCM(void) | ||
: ModuleWrap("opencog csv-table") | ||
{ | ||
static bool is_init = false; | ||
if (is_init) return; | ||
is_init = true; | ||
module_init(); | ||
} | ||
|
||
// Temporary(?) Hacky experimental API. Subject to change. | ||
void TableSCM::init(void) | ||
{ | ||
define_scheme_primitive("load-table", | ||
&TableSCM::load_table, this, "csv-table"); | ||
} | ||
|
||
// ===================================================================== | ||
|
||
void TableSCM::load_table(const Handle& h, const std::string& path) | ||
{ | ||
const AtomSpacePtr& as = SchemeSmob::ss_get_env_as("load-table"); | ||
opencog::load_csv_table(as, h, path); | ||
} | ||
|
||
void opencog_csv_table_init(void) | ||
{ | ||
static TableSCM patty; | ||
} |
Oops, something went wrong.