diff --git a/README.md b/README.md
index 1d1aa0d..2541cc1 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,53 @@
 # LSCDetection
-Data Sets and Models for Evaluation of Lexical Semantic Change Detection
+Data Sets and Models for Evaluation of Lexical Semantic Change Detection.
+
+If you use this software for academic research, [please cite this paper](#bibtex) and make sure you give appropriate credit to the below-mentioned software this repository strongly depends on.
+
+The code heavily relies on [DISSECT](http://clic.cimec.unitn.it/composes/toolkit/introduction.html) (modules/composes). For aligning embeddings (SGNS/SVD/RI) we used [VecMap](https://github.com/artetxem/vecmap) (alignment/map_embeddings.py). We used the implementation of [gensim](https://github.com/rare-technologies/gensim) for SGNS.
+
+### Testsets
+
+In `testsets/` we provide the testset versions of DURel and SURel as used in the paper.
+
+### Usage Note
+
+The scripts should be run directly from the main directory. If you wish to do otherwise, you may have to change the path you add to the path attribute in `sys.path.append('./modules/')` in the scripts. All scripts can be run directly from the command line, e.g.:
+
+	python representations/count.py <windowSize> <corpDir> <outPath> <lowerBound> <upperBound>
+
+We recommend you to run the scripts with the Python Anaconda distribution (Python 2.7.15), only for VecMap Python 3 is needed. You will have to install some additional packages such as: docopt, gensim, i.a. Those that aren't available from the Anaconda installer can be installed via EasyInstall, or by running `pip install -r requirements.txt`. 
+
+### Pipeline
+
+Under `scripts/` you find an example of a full pipeline for the models on a small test corpus. Assuming you are working on a UNIX-based system, first make the scripts executable with
+
+	chmod 755 scripts/*.sh
+
+Then run either of
+
+	bash -e scripts/make_results_sim.sh
+	bash -e scripts/make_results_disp.sh
+	bash -e scripts/make_results_wi.sh
+
+The script `make_results_sim.sh` produces results for the similarity measures (Cosine Distance, Local Neighborhood Distance) for all vector space and alignment types except for Word Injection. It first reads the gzipped test corpus in `corpora/test/corpus.txt.gz` with each line in the following format:
+
+	year [tab] word1 word2 word3...
+
+It then produces model predictions for the targets in `testsets/test/targets.tsv`, writes them under `results/` and correlates the predictions with the gold rank `testsets/test/gold.tsv`. It finally writes the Spearman correlation between each model prediction and the gold rank under `results/`.
+
+The scripts `make_results_disp.sh` and `make_results_wi.sh` do similarly for the dispersion measures (Frequency, Types, Entropy Difference) and the similarity measures for Word Injection.
+
+BibTex
+--------
+
+```
+@inproceedings{Schlechtwegetal19,
+title = {{A Wind of Change: Detecting and Evaluating Lexical Semantic Change across Times and Domains}},
+author = {Dominik Schlechtweg and Anna H\"{a}tty and Marco del Tredici and Sabine {Schulte im Walde}},
+booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
+year = "2019",
+address = "Florence, Italy",
+publisher = "Association for Computational Linguistics"
+}
+```
+
diff --git a/alignment/ci_align.py b/alignment/ci_align.py
new file mode 100644
index 0000000..dc82033
--- /dev/null
+++ b/alignment/ci_align.py
@@ -0,0 +1,83 @@
+import sys
+sys.path.append('./modules/')
+
+from docopt import docopt
+from dsm import load_pkl_files, save_pkl_files
+from composes.semantic_space.space import Space
+from composes.matrix.sparse_matrix import SparseMatrix
+from scipy.sparse import linalg
+import logging
+import time
+
+
+def main():
+    """
+    Align two sparse matrices by intersecting their columns.
+    """
+
+    # Get the arguments
+    args = docopt('''Align two sparse matrices by intersecting their columns.
+
+    Usage:
+        ci_align.py [-l] <outPath1> <outPath2> <spacePrefix1> <spacePrefix2>
+
+        <outPath1> = output path for aligned space 1
+        <outPath2> = output path for aligned space 2
+        <spacePrefix1> = path to pickled space1 without suffix
+        <spacePrefix2> = path to pickled space2 without suffix
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+    
+    ''')
+    
+    is_len = args['--len']
+    spacePrefix1 = args['<spacePrefix1>']
+    spacePrefix2 = args['<spacePrefix2>']
+    outPath1 = args['<outPath1>']
+    outPath2 = args['<outPath2>']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    # Get the two matrices as spaces and intersect their columns
+    space1 = load_pkl_files(spacePrefix1)
+    space2 = load_pkl_files(spacePrefix2)
+    id2row1 = space1.get_id2row()
+    id2row2 = space2.get_id2row()
+    id2column1 = space1.get_id2column()
+    id2column2 = space2.get_id2column()
+    column2id1 = space1.get_column2id()
+    column2id2 = space2.get_column2id()
+    intersected_columns = list(set(id2column1).intersection(id2column2))
+    intersected_columns_id1 = [column2id1[item] for item in intersected_columns]
+    intersected_columns_id2 = [column2id2[item] for item in intersected_columns]
+    reduced_matrix1 = space1.get_cooccurrence_matrix()[:, intersected_columns_id1].get_mat()
+    reduced_matrix2 = space2.get_cooccurrence_matrix()[:, intersected_columns_id2].get_mat()
+
+    if is_len:
+        # L2-normalize vectors
+        l2norm1 = linalg.norm(reduced_matrix1, axis=1, ord=2)
+        l2norm2 = linalg.norm(reduced_matrix2, axis=1, ord=2)
+        l2norm1[l2norm1==0.0] = 1.0 # Convert 0 values to 1
+        l2norm2[l2norm2==0.0] = 1.0 # Convert 0 values to 1
+        reduced_matrix1 /= l2norm1.reshape(len(l2norm1),1)
+        reduced_matrix2 /= l2norm2.reshape(len(l2norm2),1)
+
+    # Make new spaces    
+    reduced_space1 = Space(SparseMatrix(reduced_matrix1), id2row1, intersected_columns)
+    reduced_space2 = Space(SparseMatrix(reduced_matrix2), id2row2, intersected_columns)
+
+    if reduced_space1.get_id2column()!=reduced_space2.get_id2column():
+        sys.exit('Two spaces not properly aligned!')
+
+    # Save the Space object in pickle format
+    save_pkl_files(reduced_space1, outPath1 + '.sm', save_in_one_file=True)
+    save_pkl_files(reduced_space2, outPath2 + '.sm', save_in_one_file=True)
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+    
+if __name__ == '__main__':
+    main()
diff --git a/alignment/map_embeddings.py b/alignment/map_embeddings.py
new file mode 100644
index 0000000..d04e7b8
--- /dev/null
+++ b/alignment/map_embeddings.py
@@ -0,0 +1,435 @@
+import sys
+sys.path.append('./modules/')
+
+# Copyright (C) 2016-2018  Mikel Artetxe <artetxem@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import embeddings
+from cupy_utils import *
+
+import argparse
+import collections
+import numpy as np
+import re
+import sys
+import time
+import logging
+
+
+logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+logging.info(__file__.upper())
+start_time = time.time()
+
+
+def dropout(m, p):
+    if p <= 0.0:
+        return m
+    else:
+        xp = get_array_module(m)
+        mask = xp.random.rand(*m.shape) >= p
+        return m*mask
+
+
+def topk_mean(m, k, inplace=False):  # TODO Assuming that axis is 1
+    xp = get_array_module(m)
+    n = m.shape[0]
+    ans = xp.zeros(n, dtype=m.dtype)
+    if k <= 0:
+        return ans
+    if not inplace:
+        m = xp.array(m)
+    ind0 = xp.arange(n)
+    ind1 = xp.empty(n, dtype=int)
+    minimum = m.min()
+    for i in range(k):
+        m.argmax(axis=1, out=ind1)
+        ans += m[ind0, ind1]
+        m[ind0, ind1] = minimum
+    return ans / k
+
+
+def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description='Map word embeddings in two languages into a shared space')
+    parser.add_argument('src_input', help='the input source embeddings')
+    parser.add_argument('trg_input', help='the input target embeddings')
+    parser.add_argument('src_output', help='the output source embeddings')
+    parser.add_argument('trg_output', help='the output target embeddings')
+    parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)')
+    parser.add_argument('--precision', choices=['fp16', 'fp32', 'fp64'], default='fp32', help='the floating-point precision (defaults to fp32)')
+    parser.add_argument('--cuda', action='store_true', help='use cuda (requires cupy)')
+    parser.add_argument('--batch_size', default=10000, type=int, help='batch size (defaults to 10000); does not affect results, larger is usually faster but uses more memory')
+    parser.add_argument('--seed', type=int, default=0, help='the random seed (defaults to 0)')
+
+    recommended_group = parser.add_argument_group('recommended settings', 'Recommended settings for different scenarios')
+    recommended_type = recommended_group.add_mutually_exclusive_group()
+    recommended_type.add_argument('--supervised', metavar='DICTIONARY', help='recommended if you have a large training dictionary')
+    recommended_type.add_argument('--semi_supervised', metavar='DICTIONARY', help='recommended if you have a small seed dictionary')
+    recommended_type.add_argument('--identical', action='store_true', help='recommended if you have no seed dictionary but can rely on identical words')
+    recommended_type.add_argument('--unsupervised', action='store_true', help='recommended if you have no seed dictionary and do not want to rely on identical words')
+    recommended_type.add_argument('--acl2018', action='store_true', help='reproduce our ACL 2018 system')
+    recommended_type.add_argument('--aaai2018', metavar='DICTIONARY', help='reproduce our AAAI 2018 system')
+    recommended_type.add_argument('--acl2017', action='store_true', help='reproduce our ACL 2017 system with numeral initialization')
+    recommended_type.add_argument('--acl2017_seed', metavar='DICTIONARY', help='reproduce our ACL 2017 system with a seed dictionary')
+    recommended_type.add_argument('--emnlp2016', metavar='DICTIONARY', help='reproduce our EMNLP 2016 system')
+
+    init_group = parser.add_argument_group('advanced initialization arguments', 'Advanced initialization arguments')
+    init_type = init_group.add_mutually_exclusive_group()
+    init_type.add_argument('-d', '--init_dictionary', default=sys.stdin.fileno(), metavar='DICTIONARY', help='the training dictionary file (defaults to stdin)')
+    init_type.add_argument('--init_identical', action='store_true', help='use identical words as the seed dictionary')
+    init_type.add_argument('--init_numerals', action='store_true', help='use latin numerals (i.e. words matching [0-9]+) as the seed dictionary')
+    init_type.add_argument('--init_unsupervised', action='store_true', help='use unsupervised initialization')
+    init_group.add_argument('--unsupervised_vocab', type=int, default=0, help='restrict the vocabulary to the top k entries for unsupervised initialization')
+
+    mapping_group = parser.add_argument_group('advanced mapping arguments', 'Advanced embedding mapping arguments')
+    mapping_group.add_argument('--normalize', choices=['unit', 'center', 'unitdim', 'centeremb', 'none'], nargs='*', default=[], help='the normalization actions to perform in order')
+    mapping_group.add_argument('--whiten', action='store_true', help='whiten the embeddings')
+    mapping_group.add_argument('--src_reweight', type=float, default=0, nargs='?', const=1, help='re-weight the source language embeddings')
+    mapping_group.add_argument('--trg_reweight', type=float, default=0, nargs='?', const=1, help='re-weight the target language embeddings')
+    mapping_group.add_argument('--src_dewhiten', choices=['src', 'trg'], help='de-whiten the source language embeddings')
+    mapping_group.add_argument('--trg_dewhiten', choices=['src', 'trg'], help='de-whiten the target language embeddings')
+    mapping_group.add_argument('--dim_reduction', type=int, default=0, help='apply dimensionality reduction')
+    mapping_type = mapping_group.add_mutually_exclusive_group()
+    mapping_type.add_argument('-c', '--orthogonal', action='store_true', help='use orthogonal constrained mapping')
+    mapping_type.add_argument('-u', '--unconstrained', action='store_true', help='use unconstrained mapping')
+
+    self_learning_group = parser.add_argument_group('advanced self-learning arguments', 'Advanced arguments for self-learning')
+    self_learning_group.add_argument('--self_learning', action='store_true', help='enable self-learning')
+    self_learning_group.add_argument('--vocabulary_cutoff', type=int, default=0, help='restrict the vocabulary to the top k entries')
+    self_learning_group.add_argument('--direction', choices=['forward', 'backward', 'union'], default='union', help='the direction for dictionary induction (defaults to union)')
+    self_learning_group.add_argument('--csls', type=int, nargs='?', default=0, const=10, metavar='NEIGHBORHOOD_SIZE', dest='csls_neighborhood', help='use CSLS for dictionary induction')
+    self_learning_group.add_argument('--threshold', default=0.000001, type=float, help='the convergence threshold (defaults to 0.000001)')
+    self_learning_group.add_argument('--validation', default=None, metavar='DICTIONARY', help='a dictionary file for validation at each iteration')
+    self_learning_group.add_argument('--stochastic_initial', default=0.1, type=float, help='initial keep probability stochastic dictionary induction (defaults to 0.1)')
+    self_learning_group.add_argument('--stochastic_multiplier', default=2.0, type=float, help='stochastic dictionary induction multiplier (defaults to 2.0)')
+    self_learning_group.add_argument('--stochastic_interval', default=50, type=int, help='stochastic dictionary induction interval (defaults to 50)')
+    self_learning_group.add_argument('--log', help='write to a log file in tsv format at each iteration')
+    self_learning_group.add_argument('-v', '--verbose', action='store_true', help='write log information to stderr at each iteration')
+    args = parser.parse_args()
+
+    if args.supervised is not None:
+        parser.set_defaults(init_dictionary=args.supervised, normalize=['unit', 'center', 'unit'], whiten=True, src_reweight=0.5, trg_reweight=0.5, src_dewhiten='src', trg_dewhiten='trg', batch_size=1000)
+    if args.semi_supervised is not None:
+        parser.set_defaults(init_dictionary=args.semi_supervised, normalize=['unit', 'center', 'unit'], whiten=True, src_reweight=0.5, trg_reweight=0.5, src_dewhiten='src', trg_dewhiten='trg', self_learning=True, vocabulary_cutoff=20000, csls_neighborhood=10)
+    if args.identical:
+        parser.set_defaults(init_identical=True, normalize=['unit', 'center', 'unit'], whiten=True, src_reweight=0.5, trg_reweight=0.5, src_dewhiten='src', trg_dewhiten='trg', self_learning=True, vocabulary_cutoff=20000, csls_neighborhood=10)
+    if args.unsupervised or args.acl2018:
+        parser.set_defaults(init_unsupervised=True, unsupervised_vocab=4000, normalize=['unit', 'center', 'unit'], whiten=True, src_reweight=0.5, trg_reweight=0.5, src_dewhiten='src', trg_dewhiten='trg', self_learning=True, vocabulary_cutoff=20000, csls_neighborhood=10)
+    if args.aaai2018:
+        parser.set_defaults(init_dictionary=args.aaai2018, normalize=['unit', 'center'], whiten=True, trg_reweight=1, src_dewhiten='src', trg_dewhiten='trg', batch_size=1000)
+    if args.acl2017:
+        parser.set_defaults(init_numerals=True, orthogonal=True, normalize=['unit', 'center'], self_learning=True, direction='forward', stochastic_initial=1.0, stochastic_interval=1, batch_size=1000)
+    if args.acl2017_seed:
+        parser.set_defaults(init_dictionary=args.acl2017_seed, orthogonal=True, normalize=['unit', 'center'], self_learning=True, direction='forward', stochastic_initial=1.0, stochastic_interval=1, batch_size=1000)
+    if args.emnlp2016:
+        parser.set_defaults(init_dictionary=args.emnlp2016, orthogonal=True, normalize=['unit', 'center'], batch_size=1000)
+    args = parser.parse_args()
+
+    # Check command line arguments
+    if (args.src_dewhiten is not None or args.trg_dewhiten is not None) and not args.whiten:
+        print('ERROR: De-whitening requires whitening first', file=sys.stderr)
+        sys.exit(-1)
+
+    # Choose the right dtype for the desired precision
+    if args.precision == 'fp16':
+        dtype = 'float16'
+    elif args.precision == 'fp32':
+        dtype = 'float32'
+    elif args.precision == 'fp64':
+        dtype = 'float64'
+
+    # Read input embeddings
+    srcfile = open(args.src_input, encoding=args.encoding, errors='surrogateescape')
+    trgfile = open(args.trg_input, encoding=args.encoding, errors='surrogateescape')
+    src_words, x = embeddings.read(srcfile, dtype=dtype)
+    trg_words, z = embeddings.read(trgfile, dtype=dtype)
+
+    # NumPy/CuPy management
+    if args.cuda:
+        if not supports_cupy():
+            print('ERROR: Install CuPy for CUDA support', file=sys.stderr)
+            sys.exit(-1)
+        xp = get_cupy()
+        x = xp.asarray(x)
+        z = xp.asarray(z)
+    else:
+        xp = np
+    xp.random.seed(args.seed)
+
+    # Build word to index map
+    src_word2ind = {word: i for i, word in enumerate(src_words)}
+    trg_word2ind = {word: i for i, word in enumerate(trg_words)}
+
+    #print(args.normalize)
+    #print(args.self_learning)
+    # STEP 0: Normalization
+    embeddings.normalize(x, args.normalize)
+    embeddings.normalize(z, args.normalize)
+
+    # Build the seed dictionary
+    src_indices = []
+    trg_indices = []
+    if args.init_unsupervised:
+        sim_size = min(x.shape[0], z.shape[0]) if args.unsupervised_vocab <= 0 else min(x.shape[0], z.shape[0], args.unsupervised_vocab)
+        u, s, vt = xp.linalg.svd(x[:sim_size], full_matrices=False)
+        xsim = (u*s).dot(u.T)
+        u, s, vt = xp.linalg.svd(z[:sim_size], full_matrices=False)
+        zsim = (u*s).dot(u.T)
+        del u, s, vt
+        xsim.sort(axis=1)
+        zsim.sort(axis=1)
+        embeddings.normalize(xsim, args.normalize)
+        embeddings.normalize(zsim, args.normalize)
+        sim = xsim.dot(zsim.T)
+        if args.csls_neighborhood > 0:
+            knn_sim_fwd = topk_mean(sim, k=args.csls_neighborhood)
+            knn_sim_bwd = topk_mean(sim.T, k=args.csls_neighborhood)
+            sim -= knn_sim_fwd[:, xp.newaxis]/2 + knn_sim_bwd/2
+        if args.direction == 'forward':
+            src_indices = xp.arange(sim_size)
+            trg_indices = sim.argmax(axis=1)
+        elif args.direction == 'backward':
+            src_indices = sim.argmax(axis=0)
+            trg_indices = xp.arange(sim_size)
+        elif args.direction == 'union':
+            src_indices = xp.concatenate((xp.arange(sim_size), sim.argmax(axis=0)))
+            trg_indices = xp.concatenate((sim.argmax(axis=1), xp.arange(sim_size)))
+        del xsim, zsim, sim
+    elif args.init_numerals:
+        numeral_regex = re.compile('^[0-9]+$')
+        src_numerals = {word for word in src_words if numeral_regex.match(word) is not None}
+        trg_numerals = {word for word in trg_words if numeral_regex.match(word) is not None}
+        numerals = src_numerals.intersection(trg_numerals)
+        for word in numerals:
+            src_indices.append(src_word2ind[word])
+            trg_indices.append(trg_word2ind[word])
+    elif args.init_identical:
+        identical = set(src_words).intersection(set(trg_words))
+        for word in identical:
+            src_indices.append(src_word2ind[word])
+            trg_indices.append(trg_word2ind[word])
+    else:
+        f = open(args.init_dictionary, encoding=args.encoding, errors='surrogateescape')
+        for line in f:
+            src, trg = line.split()
+            try:
+                src_ind = src_word2ind[src]
+                trg_ind = trg_word2ind[trg]
+                src_indices.append(src_ind)
+                trg_indices.append(trg_ind)
+            except KeyError:
+                print('WARNING: OOV dictionary entry ({0} - {1})'.format(src, trg), file=sys.stderr)
+
+    # Read validation dictionary
+    if args.validation is not None:
+        f = open(args.validation, encoding=args.encoding, errors='surrogateescape')
+        validation = collections.defaultdict(set)
+        oov = set()
+        vocab = set()
+        for line in f:
+            src, trg = line.split()
+            try:
+                src_ind = src_word2ind[src]
+                trg_ind = trg_word2ind[trg]
+                validation[src_ind].add(trg_ind)
+                vocab.add(src)
+            except KeyError:
+                oov.add(src)
+        oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
+        validation_coverage = len(validation) / (len(validation) + len(oov))
+
+    # Create log file
+    if args.log:
+        log = open(args.log, mode='w', encoding=args.encoding, errors='surrogateescape')
+
+    # Allocate memory
+    xw = xp.empty_like(x)
+    zw = xp.empty_like(z)
+    src_size = x.shape[0] if args.vocabulary_cutoff <= 0 else min(x.shape[0], args.vocabulary_cutoff)
+    trg_size = z.shape[0] if args.vocabulary_cutoff <= 0 else min(z.shape[0], args.vocabulary_cutoff)
+    simfwd = xp.empty((args.batch_size, trg_size), dtype=dtype)
+    simbwd = xp.empty((args.batch_size, src_size), dtype=dtype)
+    if args.validation is not None:
+        simval = xp.empty((len(validation.keys()), z.shape[0]), dtype=dtype)
+
+    best_sim_forward = xp.full(src_size, -100, dtype=dtype)
+    src_indices_forward = xp.arange(src_size)
+    trg_indices_forward = xp.zeros(src_size, dtype=int)
+    best_sim_backward = xp.full(trg_size, -100, dtype=dtype)
+    src_indices_backward = xp.zeros(trg_size, dtype=int)
+    trg_indices_backward = xp.arange(trg_size)
+    knn_sim_fwd = xp.zeros(src_size, dtype=dtype)
+    knn_sim_bwd = xp.zeros(trg_size, dtype=dtype)
+
+    # Training loop
+    best_objective = objective = -100.
+    it = 1
+    last_improvement = 0
+    keep_prob = args.stochastic_initial
+    t = time.time()
+    end = not args.self_learning
+    while True:
+
+        # Increase the keep probability if we have not improve in args.stochastic_interval iterations
+        if it - last_improvement > args.stochastic_interval:
+            if keep_prob >= 1.0:
+                end = True
+            keep_prob = min(1.0, args.stochastic_multiplier*keep_prob)
+            last_improvement = it
+
+        # Update the embedding mapping
+        if args.orthogonal or not end:  # orthogonal mapping
+            u, s, vt = xp.linalg.svd(z[trg_indices].T.dot(x[src_indices]))
+            w = vt.T.dot(u.T)
+            x.dot(w, out=xw)
+            zw[:] = z
+        elif args.unconstrained:  # unconstrained mapping
+            x_pseudoinv = xp.linalg.inv(x[src_indices].T.dot(x[src_indices])).dot(x[src_indices].T)
+            w = x_pseudoinv.dot(z[trg_indices])
+            x.dot(w, out=xw)
+            zw[:] = z
+        else:  # advanced mapping
+
+            # TODO xw.dot(wx2, out=xw) and alike not working
+            xw[:] = x
+            zw[:] = z
+
+            # STEP 1: Whitening
+            def whitening_transformation(m):
+                u, s, vt = xp.linalg.svd(m, full_matrices=False)
+                return vt.T.dot(xp.diag(1/s)).dot(vt)
+            if args.whiten:
+                wx1 = whitening_transformation(xw[src_indices])
+                wz1 = whitening_transformation(zw[trg_indices])
+                xw = xw.dot(wx1)
+                zw = zw.dot(wz1)
+
+            # STEP 2: Orthogonal mapping
+            wx2, s, wz2_t = xp.linalg.svd(xw[src_indices].T.dot(zw[trg_indices]))
+            wz2 = wz2_t.T
+            xw = xw.dot(wx2)
+            zw = zw.dot(wz2)
+
+            # STEP 3: Re-weighting
+            xw *= s**args.src_reweight
+            zw *= s**args.trg_reweight
+
+            # STEP 4: De-whitening
+            if args.src_dewhiten == 'src':
+                xw = xw.dot(wx2.T.dot(xp.linalg.inv(wx1)).dot(wx2))
+            elif args.src_dewhiten == 'trg':
+                xw = xw.dot(wz2.T.dot(xp.linalg.inv(wz1)).dot(wz2))
+            if args.trg_dewhiten == 'src':
+                zw = zw.dot(wx2.T.dot(xp.linalg.inv(wx1)).dot(wx2))
+            elif args.trg_dewhiten == 'trg':
+                zw = zw.dot(wz2.T.dot(xp.linalg.inv(wz1)).dot(wz2))
+
+            # STEP 5: Dimensionality reduction
+            if args.dim_reduction > 0:
+                xw = xw[:, :args.dim_reduction]
+                zw = zw[:, :args.dim_reduction]
+
+        # Self-learning
+        if end:
+            break
+        else:
+            # Update the training dictionary
+            if args.direction in ('forward', 'union'):
+                if args.csls_neighborhood > 0:
+                    for i in range(0, trg_size, simbwd.shape[0]):
+                        j = min(i + simbwd.shape[0], trg_size)
+                        zw[i:j].dot(xw[:src_size].T, out=simbwd[:j-i])
+                        knn_sim_bwd[i:j] = topk_mean(simbwd[:j-i], k=args.csls_neighborhood, inplace=True)
+                for i in range(0, src_size, simfwd.shape[0]):
+                    j = min(i + simfwd.shape[0], src_size)
+                    xw[i:j].dot(zw[:trg_size].T, out=simfwd[:j-i])
+                    simfwd[:j-i].max(axis=1, out=best_sim_forward[i:j])
+                    simfwd[:j-i] -= knn_sim_bwd/2  # Equivalent to the real CSLS scores for NN
+                    dropout(simfwd[:j-i], 1 - keep_prob).argmax(axis=1, out=trg_indices_forward[i:j])
+            if args.direction in ('backward', 'union'):
+                if args.csls_neighborhood > 0:
+                    for i in range(0, src_size, simfwd.shape[0]):
+                        j = min(i + simfwd.shape[0], src_size)
+                        xw[i:j].dot(zw[:trg_size].T, out=simfwd[:j-i])
+                        knn_sim_fwd[i:j] = topk_mean(simfwd[:j-i], k=args.csls_neighborhood, inplace=True)
+                for i in range(0, trg_size, simbwd.shape[0]):
+                    j = min(i + simbwd.shape[0], trg_size)
+                    zw[i:j].dot(xw[:src_size].T, out=simbwd[:j-i])
+                    simbwd[:j-i].max(axis=1, out=best_sim_backward[i:j])
+                    simbwd[:j-i] -= knn_sim_fwd/2  # Equivalent to the real CSLS scores for NN
+                    dropout(simbwd[:j-i], 1 - keep_prob).argmax(axis=1, out=src_indices_backward[i:j])
+            if args.direction == 'forward':
+                src_indices = src_indices_forward
+                trg_indices = trg_indices_forward
+            elif args.direction == 'backward':
+                src_indices = src_indices_backward
+                trg_indices = trg_indices_backward
+            elif args.direction == 'union':
+                src_indices = xp.concatenate((src_indices_forward, src_indices_backward))
+                trg_indices = xp.concatenate((trg_indices_forward, trg_indices_backward))
+
+            # Objective function evaluation
+            if args.direction == 'forward':
+                objective = xp.mean(best_sim_forward).tolist()
+            elif args.direction == 'backward':
+                objective = xp.mean(best_sim_backward).tolist()
+            elif args.direction == 'union':
+                objective = (xp.mean(best_sim_forward) + xp.mean(best_sim_backward)).tolist() / 2
+            if objective - best_objective >= args.threshold:
+                last_improvement = it
+                best_objective = objective
+
+            # Accuracy and similarity evaluation in validation
+            if args.validation is not None:
+                src = list(validation.keys())
+                xw[src].dot(zw.T, out=simval)
+                nn = asnumpy(simval.argmax(axis=1))
+                accuracy = np.mean([1 if nn[i] in validation[src[i]] else 0 for i in range(len(src))])
+                similarity = np.mean([max([simval[i, j].tolist() for j in validation[src[i]]]) for i in range(len(src))])
+
+            # Logging
+            duration = time.time() - t
+            if args.verbose:
+                print(file=sys.stderr)
+                print('ITERATION {0} ({1:.2f}s)'.format(it, duration), file=sys.stderr)
+                print('\t- Objective:        {0:9.4f}%'.format(100 * objective), file=sys.stderr)
+                print('\t- Drop probability: {0:9.4f}%'.format(100 - 100*keep_prob), file=sys.stderr)
+                if args.validation is not None:
+                    print('\t- Val. similarity:  {0:9.4f}%'.format(100 * similarity), file=sys.stderr)
+                    print('\t- Val. accuracy:    {0:9.4f}%'.format(100 * accuracy), file=sys.stderr)
+                    print('\t- Val. coverage:    {0:9.4f}%'.format(100 * validation_coverage), file=sys.stderr)
+                sys.stderr.flush()
+            if args.log is not None:
+                val = '{0:.6f}\t{1:.6f}\t{2:.6f}'.format(
+                    100 * similarity, 100 * accuracy, 100 * validation_coverage) if args.validation is not None else ''
+                print('{0}\t{1:.6f}\t{2}\t{3:.6f}'.format(it, 100 * objective, val, duration), file=log)
+                log.flush()
+
+        t = time.time()
+        it += 1
+
+    # Write mapped embeddings
+    srcfile = open(args.src_output, mode='w', encoding=args.encoding, errors='surrogateescape')
+    trgfile = open(args.trg_output, mode='w', encoding=args.encoding, errors='surrogateescape')
+    embeddings.write(src_words, xw, srcfile)
+    embeddings.write(trg_words, zw, trgfile)
+    srcfile.close()
+    trgfile.close()
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+    
+if __name__ == '__main__':
+    main()
diff --git a/alignment/sgns_vi.py b/alignment/sgns_vi.py
new file mode 100644
index 0000000..588c074
--- /dev/null
+++ b/alignment/sgns_vi.py
@@ -0,0 +1,117 @@
+import sys
+sys.path.append('./modules/')
+
+import codecs
+from collections import defaultdict
+import os
+from os.path import basename
+import zipfile
+from docopt import docopt
+import logging
+import logging.config
+import time
+import gensim
+from gensim.models.word2vec import Word2Vec
+from gensim.models import KeyedVectors
+from dsm import PathLineSentences_mod
+
+
+def intersection_dic(t1, t2):
+    voc_t1 = [x for xs in t1 for x in xs]
+    voc_t2 = [x for xs in t2 for x in xs]
+    intersection = list(set(voc_t1) & set(voc_t2))
+    return [[x] for x in intersection] # note: gensim wants list of iterables (i.e. list of lists)
+
+def main():
+    """
+    Make comparable embedding vector spaces with Skip-Gram with Negative Sampling as described in:
+
+       Yoon Kim, Yi-I. Chiu, Kentaro Hanaki, Darshan Hegde, and Slav Petrov. 2014. Temporal analysis of language through neural language models. arXiv preprint arXiv:1405.3515.
+    
+    """
+
+    # Get the arguments
+    args = docopt("""Make comparable embedding vector spaces with Skip-Gram with Negative Sampling and Vector Initialization from corpus.
+
+    Usage:
+        sgns_vi.py [-l] <vectorsPath> <windowSize> <dim> <k> <t> <minCount> <itera> <corpDir> <outPath> <lowerBound> <upperBound>
+        
+    Arguments:
+       
+        <vectorsPath> = vectors on which model should be initialized
+        <windowSize> = the linear distance of context words to consider in each direction
+        <dim> = dimensionality of embeddings
+        <k> = number of negative samples parameter (equivalent to shifting parameter for PPMI)
+        <t> = threshold for subsampling
+        <minCount> = number of occurrences for a word to be included in the vocabulary
+        <itera> = number of iterations
+        <corpDir> = path to corpus directory with zipped files, each sentence in form 'year\tword1 word2 word3...'
+        <outPath> = output path for vectors
+        <lowerBound> = lower bound for time period
+        <upperBound> = upper bound for time period
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+
+    Note:
+        Initialization vectors should be non-length-normalized.
+
+    """)
+    
+    is_len = args['--len']
+    initVectorsPath = args['<vectorsPath>'] 
+    windowSize = int(args['<windowSize>'])    
+    dim = int(args['<dim>'])    
+    k = int(args['<k>'])
+    if args['<t>']=='None':
+        t = None
+    else:
+        t = float(args['<t>'])        
+    minCount = int(args['<minCount>'])    
+    itera = int(args['<itera>'])   
+    corpDir = args['<corpDir>']
+    outPath = args['<outPath>']
+    lowerBound = int(args['<lowerBound>'])
+    upperBound = int(args['<upperBound>'])
+
+    logging.config.dictConfig({'version': 1, 'disable_existing_loggers': True,})
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+         
+    # Initialize model
+    model = gensim.models.Word2Vec(sg=1, # skipgram
+    							   hs=0, # negative sampling
+    							   negative=k, # number of negative samples
+    							   sample=t, # threshold for subsampling, if None, no subsampling is performed
+    							   size=dim, window=windowSize, min_count=minCount, iter=itera, workers=20)
+    
+    # Receive vectors for initialization
+    initVectors = KeyedVectors.load_word2vec_format(initVectorsPath, binary=False)
+
+    # Initialize vocabulary
+    vocab_initVectors = initVectors.vocab
+
+     # Intersect vocabulary
+    vocab_sentences_t_2 = PathLineSentences_mod(corpDir, lowerBound=lowerBound, upperBound=upperBound)
+    vocab_intersect = intersection_dic([[token] for token in vocab_initVectors],vocab_sentences_t_2)
+    model.build_vocab(vocab_intersect)
+
+    # Train
+    sentences = PathLineSentences_mod(corpDir, lowerBound=lowerBound, upperBound=upperBound)
+    model.intersect_word2vec_format(initVectorsPath, lockf=1)
+    model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)
+    
+    if is_len:
+        # L2-normalize vectors
+        model.init_sims(replace=True)
+
+    # Save the vectors and the model
+    model.wv.save_word2vec_format(outPath + '.w2v')
+    #model.save(outPath + '.model')
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/alignment/srv_align.py b/alignment/srv_align.py
new file mode 100644
index 0000000..e217fb0
--- /dev/null
+++ b/alignment/srv_align.py
@@ -0,0 +1,201 @@
+import sys
+sys.path.append('./modules/')
+
+import os
+from docopt import docopt
+from dsm import load_pkl_files, save_pkl_files
+import logging
+import time
+import codecs
+import numpy as np
+from composes.semantic_space.space import Space
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.matrix.sparse_matrix import SparseMatrix
+from scipy.sparse import lil_matrix, csr_matrix, csc_matrix, hstack, vstack
+from sklearn.random_projection import sparse_random_matrix
+
+
+def main():
+    """
+    Create two aligned low-dimensional vector spaces by sparse random indexing from two co-occurrence matrices as described in:
+       Pierpaolo Basile, Annalina Caputo and Giovanni Semeraro, 2014. Analysing Word Meaning over Time by Exploiting Temporal Random Indexing.
+    """
+
+    # Get the arguments
+    args = docopt('''Create two aligned low-dimensional vector spaces by sparse random indexing from two co-occurrence matrices.
+
+    Usage:
+        srv_align.py [-l] (-s <seeds> | -a) <dim> <t> <outPath1> <outPath2> <outPathElement> <spacePrefix1> <spacePrefix2>
+
+        <samplesize> = number negative samples, expressed as percentage of positive samples
+        <negAlpha> = smoothing parameter for negative sampling
+        <seeds> = number of non-zero values in each random vector
+        <dim> = number of dimensions for random vectors
+        <t> = threshold for downsampling (if t=None, no subsampling is applied)
+        <outPath1> = output path for aligned space 1
+        <outPath2> = output path for aligned space 2
+        <spacePrefix1> = path to pickled space without suffix
+        <spacePrefix2> = path to pickled space without suffix
+        <outPathElement> = output path for elemental space (context vectors)
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+        -s, --see   specify number of seeds manually
+        -a, --aut   calculate number of seeds automatically as proposed in [1,2]
+  
+    References:
+        [1] Ping Li, T. Hastie and K. W. Church, 2006,
+           "Very Sparse Random Projections".
+           http://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
+        [2] D. Achlioptas, 2001, "Database-friendly random projections",
+           http://www.cs.ucsc.edu/~optas/papers/jl.pdf
+
+    ''')
+    
+    is_len = args['--len']       
+    is_seeds = args['--see']
+    if is_seeds:
+        seeds = int(args['<seeds>'])
+    is_aut = args['--aut']
+    dim = int(args['<dim>'])
+    if args['<t>']=='None':
+        t = None
+    else:
+        t = float(args['<t>'])
+    outPath1 = args['<outPath1>']
+    outPath2 = args['<outPath2>']
+    outPathElement = args['<outPathElement>']
+    spacePrefix1 = args['<spacePrefix1>']
+    spacePrefix2 = args['<spacePrefix2>']
+
+    
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    # Load input spaces
+    space1 = load_pkl_files(spacePrefix1)
+    space2 = load_pkl_files(spacePrefix2)
+    matrix1 = csc_matrix(space1.get_cooccurrence_matrix().get_mat())
+    matrix2 = csc_matrix(space2.get_cooccurrence_matrix().get_mat())
+
+    # Get mappings between rows/columns and words
+    id2row1 = space1.get_id2row()
+    id2row2 = space2.get_id2row()
+    row2id_1 = space1.get_row2id()
+    row2id_2 = space2.get_row2id()
+    id2column1 = space1.get_id2column()
+    id2column2 = space2.get_id2column()
+    
+    # Get union of rows and columns in both spaces
+    unified_rows = sorted(list(set(id2row1).union(id2row2)))
+    unified_columns = sorted(list(set(id2column1).union(id2column2)))
+    columns_diff1 = list(set(unified_columns) - set(id2column1))
+    columns_diff2 = list(set(unified_columns) - set(id2column2))
+    
+    # Get mappings of indices of columns in original spaces to indices of columns in unified space
+    c2i = {w: i for i, w in enumerate(unified_columns)}
+    cj2i1 = {j: c2i[w] for j, w in enumerate(id2column1+columns_diff1)}
+    cj2i2 = {j: c2i[w] for j, w in enumerate(id2column2+columns_diff2)}
+
+    if t!=None:
+        rows_diff1 = list(set(unified_rows) - set(id2row1))
+        rows_diff2 = list(set(unified_rows) - set(id2row2))
+        
+        r2i = {w: i for i, w in enumerate(unified_rows)}
+        rj2i1 = {j: r2i[w] for j, w in enumerate(id2row1+rows_diff1)}
+        rj2i2 = {j: r2i[w] for j, w in enumerate(id2row2+rows_diff2)}
+        
+        # Build spaces with unified COLUMNS
+        new_columns1 = csc_matrix((len(id2row1),len(columns_diff1))) # Get empty columns for additional context words
+        unified_matrix1 = hstack((matrix1,new_columns1))[:,sorted(cj2i1, key=cj2i1.get)] # First concatenate matrix and empty columns and then order columns according to unified_columns
+        
+        new_columns2 = csc_matrix((len(id2row2),len(columns_diff2)))
+        unified_matrix2 = hstack((matrix2,new_columns2))[:,sorted(cj2i2, key=cj2i2.get)]
+    
+        # Build spaces with unified ROWS
+        new_rows1 = csc_matrix((len(rows_diff1),len(unified_columns)))
+        final_unified_matrix1 = csc_matrix(vstack((unified_matrix1,new_rows1)))[sorted(rj2i1, key=rj2i1.get)]
+   
+        new_rows2 = csc_matrix((len(rows_diff2),len(unified_columns)))
+        final_unified_matrix2 = csc_matrix(vstack((unified_matrix2,new_rows2)))[sorted(rj2i2, key=rj2i2.get)]
+        
+        # Add up final unified matrices
+        common_unified_matrix = np.add(final_unified_matrix1,final_unified_matrix2)
+
+        # Get number of total occurrences of any word
+        totalOcc = np.sum(common_unified_matrix)
+
+        # Define function for downsampling
+        downsample = lambda f: np.sqrt(float(t)/f) if f>t else 1.0
+        downsample = np.vectorize(downsample)
+
+        # Get total normalized co-occurrence frequency of all contexts in both spaces
+        context_freqs = np.array(common_unified_matrix.sum(axis=0)/totalOcc)[0]
+        
+
+    ## Generate ternary random vectors
+    if is_seeds:        
+        elementalMatrix = lil_matrix((len(unified_columns),dim))    
+        # Generate base vector for random vectors
+        baseVector = np.zeros(dim) # Note: Make sure that number of seeds is not greater than dimensions
+        for i in range(0,seeds/2):
+            baseVector[i] = 1.0
+        for i in range(seeds/2,seeds):
+            baseVector[i] = -1.0        
+        for i in range(len(unified_columns)): # To-do: make this more efficient by generating random indices for a whole array
+            np.random.shuffle(baseVector)
+            elementalMatrix[i] = baseVector
+    if is_aut:
+        elementalMatrix = sparse_random_matrix(dim,len(unified_columns)).T
+       
+    # Initialize target vectors
+    alignedMatrix1 = np.zeros((len(id2row1),dim))    
+    alignedMatrix2 = np.zeros((len(id2row2),dim))
+
+
+    # Iterate over rows of space, find context words and update aligned matrix with low-dimensional random vectors of these context words
+    for (space,id2row,cj2i,alignedMatrix) in [(space1,id2row1,cj2i1,alignedMatrix1),(space2,id2row2,cj2i2,alignedMatrix2)]:
+        # Iterate over targets
+        for i, target in enumerate(id2row):
+            # Get co-occurrence values as matrix
+            m = space.get_row(target).get_mat()
+            # Get nonzero indexes
+            nonzeros = m.nonzero()
+            nonzeros = [cj2i[j] for j in nonzeros[1]]
+            data = m.data
+            pos_context_vectors = elementalMatrix[nonzeros]
+            if t!=None:
+                # Apply subsampling
+                rfs = context_freqs[nonzeros]
+                rfs = downsample(rfs)
+                data *= rfs
+            # Weight context vectors by occurrence frequency
+            pos_context_vectors = pos_context_vectors.multiply(data.reshape(-1,1))
+            # Add up context vectors and store as row for target
+            alignedMatrix[i] = np.sum(pos_context_vectors, axis=0)
+                
+    if is_len:
+        # L2-normalize vectors
+        l2norm1 = np.linalg.norm(alignedMatrix1, axis=1, ord=2)
+        l2norm2 = np.linalg.norm(alignedMatrix2, axis=1, ord=2)
+        l2norm1[l2norm1==0.0] = 1.0 # Convert 0 values to 1
+        l2norm2[l2norm2==0.0] = 1.0 # Convert 0 values to 1
+        alignedMatrix1 /= l2norm1.reshape(len(l2norm1),1)
+        alignedMatrix2 /= l2norm2.reshape(len(l2norm2),1)
+        
+    # Make spaces
+    alignedSpace1 = Space(DenseMatrix(alignedMatrix1), id2row1, [])
+    alignedSpace2 = Space(DenseMatrix(alignedMatrix2), id2row2, [])
+    elementalSpace = Space(SparseMatrix(elementalMatrix), unified_columns, [])
+    
+    # Save the Space objects in pickle format
+    save_pkl_files(alignedSpace1, outPath1 + '.dm', save_in_one_file=False)
+    save_pkl_files(alignedSpace2, outPath2 + '.dm', save_in_one_file=False)
+    save_pkl_files(elementalSpace, outPathElement + '.dm', save_in_one_file=False)
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+    
+if __name__ == '__main__':
+    main()
diff --git a/alignment/wi.py b/alignment/wi.py
new file mode 100644
index 0000000..7cf7590
--- /dev/null
+++ b/alignment/wi.py
@@ -0,0 +1,103 @@
+import sys
+sys.path.append('./modules/')
+
+import codecs
+import os
+from docopt import docopt
+import logging
+import time
+import re
+import random
+import numpy as np
+
+
+def main():
+    """
+    Combine two corpora and shuffle. Seed words are substituted in first corpus. (Word Injection)
+    """
+
+    # Get the arguments
+    args = docopt("""Combine two corpora and shuffle. Seed words are substituted in first corpus. (Word Injection)
+
+
+    Usage:
+        wi.py <corp1> <corp2> <lowerBound1> <upperBound1> <lowerBound2> <upperBound2> <targ> <outDir>
+        
+    Arguments:
+       
+        <corp1> = first corpus
+        <corp2> = second corpus 
+        <lowerBound1> = lower bound for time period in first corpus
+        <upperBound1> = upper bound for time period in first corpus
+        <lowerBound2> = lower bound for time period in second corpus
+        <upperBound2> = upper bound for time period in second corpus
+        <targ> = target words (to substitute in one corpus)
+        <outdir> = path+filename to target corpus (2 corpora combined, with substitution)
+
+    """)
+    
+    corp1 = args['<corp1>']
+    corp2 = args['<corp2>']
+    lowerBound1 = int(args['<lowerBound1>'])
+    upperBound1 = int(args['<upperBound1>'])
+    lowerBound2 = int(args['<lowerBound2>'])
+    upperBound2 = int(args['<upperBound2>'])
+    targWords = args['<targ>']
+    outFile = args['<outDir>']
+    
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()
+
+    # get seeds words
+    seedList = []
+    for line in codecs.open(targWords, "r", 'utf-8'):
+        line = line.strip().split("\t")[0]
+        seedList.append(line)
+
+    searchPat = re.compile(r'(\b(?:%s)\b)' % '|'.join(seedList), re.UNICODE)
+    
+    lineCt = 0
+    wFile = codecs.open("tempOutFile.txt", "w", 'utf-8')
+    for line in codecs.open(corp1, "r", 'utf-8'):
+        date = int(line.split("\t")[0])   
+        if not lowerBound1 <= date <= upperBound1: # skip every sentence which is not in timeframe
+            continue
+        newLine = re.sub(searchPat, r"\1_", line)        
+        wFile.write(newLine)
+        lineCt +=1
+    for line in codecs.open(corp2, "r", 'utf-8'):
+        date = int(line.split("\t")[0])   
+        if not lowerBound2 <= date <= upperBound2: # skip every sentence which is not in timeframe
+            continue
+        wFile.write(line)
+        lineCt +=1
+    print("Seed words substituted. Total number of lines: %d" % (lineCt))
+    indList = list(range(lineCt))
+    random.shuffle(indList)
+    sublists = np.array_split(indList, 5)
+    
+    # make sure that you do not append at the outFile form the last iteration
+    open(outFile, 'w').close()
+    wFile = codecs.open(outFile, "a", 'utf-8')
+    for nrSub, sublist in enumerate(sublists):
+        sublist = set(sublist)
+        print("Processing %d part ..." % (nrSub))
+        smallLineList = []
+        for nrL, line in enumerate(codecs.open("tempOutFile.txt", "r", 'utf-8')):
+            if nrL in sublist:
+                smallLineList.append(line)
+        random.shuffle(smallLineList)
+        for line in smallLineList:
+            wFile.write(line.strip("\n")+"\n")
+                
+            
+    os.remove("tempOutFile.txt")
+    
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+    
+if __name__ == '__main__':
+    main()
+        
diff --git a/corpora/test/corpus.txt.gz b/corpora/test/corpus.txt.gz
new file mode 100644
index 0000000..f090cee
Binary files /dev/null and b/corpora/test/corpus.txt.gz differ
diff --git a/corpora/test_wi/corpus.txt.gz b/corpora/test_wi/corpus.txt.gz
new file mode 100644
index 0000000..473b152
Binary files /dev/null and b/corpora/test_wi/corpus.txt.gz differ
diff --git a/evaluation/spearman.py b/evaluation/spearman.py
new file mode 100644
index 0000000..78fdc47
--- /dev/null
+++ b/evaluation/spearman.py
@@ -0,0 +1,75 @@
+import sys
+sys.path.append('./modules/')
+
+import os
+import random
+import codecs
+import numpy as np
+from docopt import docopt
+from scipy.stats import spearmanr
+import logging
+import time
+
+
+def main():
+    """
+    Calculate spearman correlation coefficient for specified columns of two files. 
+    """
+
+    # Get the arguments
+    args = docopt("""Calculate spearman correlation coefficient for specified columns of two files.                     
+
+
+    Usage:
+        spearman.py <file1> <file2> <filename1> <filename2> <col1> <col2>
+        
+    Arguments:
+        <file1> = path to file1
+        <file2> = path to file2
+        <filename1> = name of file1 to print
+        <filename2> = name of file2 to print
+        <col1> = target column in file1
+        <col2> = target column in file2
+
+    Note:
+        Assumes tap-separated CSV files as input. Assumes that rows are in same order and columns have same length. Nan values are omitted.
+        
+    """)
+
+    file1 = args['<file1>']
+    file2 = args['<file2>']
+    filename1 = args['<filename1>']
+    filename2 = args['<filename2>']
+    col1 = int(args['<col1>'])
+    col2 = int(args['<col2>'])
+    
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    # Get data
+    with codecs.open(file1, 'r', 'utf-8') as f_in:
+        data1 = np.array([float(line.strip().split('\t')[col1]) for line in f_in])
+        
+    with codecs.open(file2, 'r', 'utf-8') as f_in:
+        data2 = np.array([float(line.strip().split('\t')[col2]) for line in f_in])
+
+    # Check if there are non-number values    
+    nan_list1 = [x for x in data1 if np.isnan(x)]   
+    nan_list2 = [x for x in data2 if np.isnan(x)]
+    if len(nan_list1)>0 or len(nan_list2)>0:
+        print 'nan encountered!'       
+
+    # compute correlation
+    try:
+        rho, p = spearmanr(data1, data2, nan_policy='omit')
+    except ValueError as e:
+        logging.info(e)
+        rho, p = 'nan', 'nan'
+
+    print filename1, filename2, rho, p                 
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+                
+if __name__ == '__main__':
+    main()
diff --git a/measures/cd.py b/measures/cd.py
new file mode 100644
index 0000000..eba79dd
--- /dev/null
+++ b/measures/cd.py
@@ -0,0 +1,99 @@
+import sys
+sys.path.append('./modules/')
+
+import os
+from os.path import basename
+from docopt import docopt
+from dsm import load_pkl_files
+import logging
+import time
+import codecs
+import numpy as np
+from scipy import spatial
+from scipy.sparse import csr_matrix
+from composes.matrix.dense_matrix import DenseMatrix
+            
+def main():
+    """
+    Compute cosine distance for target pairs from two vector spaces.
+    """
+
+    # Get the arguments
+    args = docopt("""Compute cosine distance for target pairs from two vector spaces.
+
+    Usage:
+        cd.py [(-f | -s)] <spacePrefix1> <spacePrefix2> <outPath> [<testset>]
+
+        <spacePrefix1> = path to pickled space without suffix
+        <spacePrefix2> = path to pickled space without suffix
+        <testset> = path to file with tab-separated word pairs
+        <outPath> = output path for result file
+
+    Options:
+        -f, --fst   write only first target in output file
+        -s, --scd   write only second target in output file
+
+     Note:
+         Important: spaces must be already aligned (columns in same order)!
+        
+    """)
+    
+    is_fst = args['--fst']
+    is_scd = args['--scd']
+    spacePrefix1 = args['<spacePrefix1>']
+    spacePrefix2 = args['<spacePrefix2>']
+    testset = args['<testset>']
+    outPath = args['<outPath>']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+    
+    # Load spaces
+    space1 = load_pkl_files(spacePrefix1)
+    space2 = load_pkl_files(spacePrefix2)
+    
+    if testset!=None:
+        # target vectors in first/second column are computed from space1/space2
+        with codecs.open(testset, 'r', 'utf-8') as f_in:
+            targets = [(line.strip().split('\t')[0],line.strip().split('\t')[1]) for line in f_in]
+    else:
+        # If no test set is provided, compute values for all targets occurring in both spaces
+        target_intersection = set([target.decode('utf-8') for target in space1.get_row2id()]).intersection([target.decode('utf-8') for target in space2.get_row2id()])
+        targets = zip(target_intersection,target_intersection)
+        
+    scores = {}
+    for i, (t1, t2) in enumerate(targets):
+        
+        # Get row vectors
+        try:
+            row1 = space1.get_row(t1.encode('utf8'))
+            row2 = space2.get_row(t2.encode('utf8'))
+        except KeyError:
+            scores[(t1, t2)] = 'nan'
+            continue
+
+        # Convert to list
+        row_vector1 = csr_matrix(row1.get_mat()).toarray()[0].tolist()
+        row_vector2 = csr_matrix(row2.get_mat()).toarray()[0].tolist()
+        
+        # Compute cosine distance of vectors
+        distance = spatial.distance.cosine(row_vector1, row_vector2)
+        scores[(t1, t2)] = distance
+        
+        
+    with codecs.open(outPath +'.csv', 'w', 'utf-8') as f_out:
+        for (t1, t2) in targets:
+            if is_fst: # output only first target string
+                print >> f_out, '\t'.join((t1, str(float(scores[(t1, t2)]))))
+            elif is_scd: # output only second target string
+                print >> f_out, '\t'.join((t2, str(float(scores[(t1, t2)]))))            
+            else: # standard outputs both target strings    
+                print >> f_out, '\t'.join(('%s,%s' % (t1,t2), str(float(scores[(t1, t2)]))))
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+    
+    
+
+if __name__ == '__main__':
+    main()
diff --git a/measures/entropy.py b/measures/entropy.py
new file mode 100644
index 0000000..9331fc2
--- /dev/null
+++ b/measures/entropy.py
@@ -0,0 +1,90 @@
+import sys
+sys.path.append('./modules/')
+
+import os
+from os.path import basename
+from docopt import docopt
+from dsm import load_pkl_files
+from scipy.stats import entropy
+import logging
+import time
+import codecs
+import numpy as np
+
+            
+def main():
+    """
+    Compute entropy for rows of targets from vector space.
+    """
+
+    # Get the arguments
+    args = docopt("""Compute entropy for rows of targets from vector space.
+
+    Usage:
+        entropy.py [-n] <spacePrefix> <outPath> [<testset>]
+
+        <spacePrefix> = path to pickled space without suffix
+        <outPath> = output path for result file
+        <testset> = path to file with targets in first column
+        
+    Options:
+        -n, --nrm  normalize values by log of number of types
+
+    """)
+    
+    is_norm = args['--nrm']
+    spacePrefix = args['<spacePrefix>']
+    outPath = args['<outPath>']        
+    testset = args['<testset>']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    space = load_pkl_files(spacePrefix)
+
+    if testset!=None:
+        # target vectors in first/second column are computed from space1/space2
+        with codecs.open(testset, 'r', 'utf-8') as f_in:
+            targets = [line.strip().split('\t')[0] for line in f_in]
+    else:
+        # If no test set is provided, compute values for all targets
+        targets = [target.decode('utf-8') for target in space.get_row2id()]  
+    
+    scores = {}
+    norms = {}
+    for i, v in enumerate(targets):
+        
+        try:
+            row = space.get_row(v.encode('utf8'))
+        except KeyError:
+            scores[v] = 'nan'
+            norms[v] = 'nan'
+            continue
+        
+        # Get all counts in row (non-zero elements)
+        counts = row.get_mat().data
+
+        # Compute entropy of row
+        H = entropy(counts, base=2)      
+        scores[v] = H
+
+        if is_norm:
+            # Get number of non-zero elements in row
+            types = row.get_mat().getnnz()
+            norms[v] = np.log2(types)
+               
+        
+    with codecs.open(outPath+'.csv', 'w', 'utf-8') as f_out:
+        for word in targets:
+            if is_norm:
+                print >> f_out, '\t'.join((word, str(float(scores[word])/float(norms[word]))))
+            else:    
+                print >> f_out, '\t'.join((word, str(float(scores[word]))))
+
+            
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+    
+
+if __name__ == '__main__':
+    main()
diff --git a/measures/freq.py b/measures/freq.py
new file mode 100644
index 0000000..b1ea543
--- /dev/null
+++ b/measures/freq.py
@@ -0,0 +1,90 @@
+import sys
+sys.path.append('./modules/')
+
+import codecs
+from collections import defaultdict
+import os
+from dsm import PathLineSentences_mod
+from docopt import docopt
+import logging
+import time
+
+
+def main():
+    """
+    Get frequencies from corpus.
+    """
+
+    # Get the arguments
+    args = docopt("""Get frequencies from corpus.
+
+    Usage:
+        freq.py [-o] [(-n <normConst>)] <corpDir> <outPath> <lowerBound> <upperBound> [<testset>]
+        
+    Arguments:
+       
+        <corpDir> = path to zipped corpus directory
+        <outPath> = output path for result file
+        <lowerBound> = lower bound for time period
+        <upperBound> = upper bound for time period
+        <testset> = path to tab-separated file with targets in first column
+        <normConst> = normalization constant
+
+    Options:
+        -n, --nrm  normalize values by normalization constant
+
+     Note:
+         Outputs frequencies for all tokens in case no testset is provided.
+        
+    """)
+    
+    is_norm = args['--nrm']
+    if is_norm:
+        normConst = float(args['<normConst>'])
+    corpDir = args['<corpDir>']
+    outPath = args['<outPath>']        
+    lowerBound = int(args['<lowerBound>'])
+    upperBound = int(args['<upperBound>'])
+    testset = args['<testset>']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()
+    
+             
+    freqs = defaultdict(int)      
+
+    sentences = PathLineSentences_mod(corpDir, lowerBound=lowerBound, upperBound=upperBound)
+
+    for sentence in sentences:
+        for word in sentence:
+            freqs[word] = freqs[word] + 1
+
+
+    if testset!=None:
+        # Targets for which to output values.
+        with codecs.open(testset, 'r', 'utf-8') as f_in:
+            targets = [line.strip().split('\t')[0] for line in f_in]
+    else:
+        # Rank the lemmas
+        freqs_ranked = sorted(freqs, key=lambda x: -(freqs[x]))
+        # If no test set is provided, compute values for all tokens
+        targets = freqs_ranked       
+
+    with codecs.open(outPath + '.csv', 'w', 'utf-8') as f_out:
+        for word in targets:
+            if word in freqs:
+                if is_norm:
+                    freqs[word]=float(freqs[word])/normConst
+                print >> f_out, '\t'.join((word, str(float(freqs[word]))))
+            else:
+                print >> f_out, '\t'.join((word, 'nan'))
+
+                
+    logging.info('total number of tokens: %d' % (sentences.corpusSize))
+    logging.info('total number of types: %d' % (len(freqs.keys())))
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+    
+
+if __name__ == '__main__':
+    main()
diff --git a/measures/lnd.py b/measures/lnd.py
new file mode 100644
index 0000000..671c2a9
--- /dev/null
+++ b/measures/lnd.py
@@ -0,0 +1,103 @@
+import sys
+sys.path.append('./modules/')
+
+import os
+from os.path import basename
+from docopt import docopt
+from dsm import load_pkl_files
+import codecs
+import numpy as np
+from scipy import spatial
+from composes.similarity.cos import CosSimilarity
+import logging
+import time
+
+            
+def main():
+    """
+    Compute local neighborhood distance for target pairs from two vector spaces.
+    """
+
+    # Get the arguments
+    args = docopt("""Compute local neighborhood distance for target pairs from two vector spaces.
+
+    Usage:
+        lnd.py [(-f | -s)] <spacePrefix1> <spacePrefix2> <k> <outPath> [<testset>]
+
+        <spacePrefix1> = path to pickled space without suffix
+        <spacePrefix2> = path to pickled space without suffix
+        <testset> = path to file with tab-separated word pairs
+        <k> = parameter k (k nearest neighbors)
+        <outPath> = output path for result file
+
+    Options:
+        -f, --fst   write only first target in output file
+        -s, --scd   write only second target in output file
+        
+    """)
+    
+    is_fst = args['--fst']
+    is_scd = args['--scd']
+    spacePrefix1 = args['<spacePrefix1>']
+    spacePrefix2 = args['<spacePrefix2>']
+    testset = args['<testset>']
+    outPath = args['<outPath>']
+    k = int(args['<k>'])
+    
+    logging.config.dictConfig({'version': 1, 'disable_existing_loggers': True,})
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    # Load spaces
+    space1 = load_pkl_files(spacePrefix1)
+    space2 = load_pkl_files(spacePrefix2)
+    
+    if testset!=None:
+        # target vectors in first/second column are computed from space1/space2
+        with codecs.open(testset, 'r', 'utf8') as f_in:
+            targets = [(line.strip().split('\t')[0],line.strip().split('\t')[1]) for line in f_in]
+    else:
+        # If no test set is provided, compute values for all targets occurring in both spaces
+        target_intersection = set([target.decode('utf8') for target in space1.get_row2id()]).intersection([target.decode('utf8') for target in space2.get_row2id()])
+        targets = zip(target_intersection,target_intersection)
+    
+    scores = {}
+    neighborUnionSizes = {}
+    for i, (t1, t2) in enumerate(targets):
+        
+        # Get nearest neighbors
+        try:
+            neighbors1 = space1.get_neighbours(t1.encode('utf8'), k, CosSimilarity())
+            neighbors2 = space2.get_neighbours(t2.encode('utf8'), k, CosSimilarity()) 
+        except KeyError:
+            scores[(t1, t2)] = 'nan'
+            neighborUnionSizes[(t1, t2)] = 'nan'
+            continue
+               
+        neighborUnion = list(set([a for (a,b) in neighbors1+neighbors2 if (a in space1.row2id and a in space2.row2id and not a in [t1.encode('utf8'),t2.encode('utf8')])]))
+             
+        simVec1 = [space1.get_sim(t1.encode('utf8'), n, CosSimilarity()) for n in neighborUnion] 
+        simVec2 = [space2.get_sim(t2.encode('utf8'), n, CosSimilarity()) for n in neighborUnion]
+        
+        # Compute cosine distance of vectors
+        distance = spatial.distance.cosine(simVec1, simVec2)
+        scores[(t1, t2)] = distance
+        neighborUnionSizes[(t1, t2)] = len(neighborUnion)
+
+
+    with codecs.open(outPath +'.csv', 'w', 'utf-8') as f_out:
+        for (t1, t2) in targets:
+            if is_fst: # output only first target string
+                print >> f_out, '\t'.join((t1, str(float(scores[(t1, t2)])), str(neighborUnionSizes[(t1, t2)])))
+            elif is_scd: # output only second target string
+                print >> f_out, '\t'.join((t2, str(float(scores[(t1, t2)])), str(neighborUnionSizes[(t1, t2)])))
+            else: # standard outputs both target strings    
+                print >> f_out, '\t'.join(('%s,%s' % (t1,t2), str(float(scores[(t1, t2)])), str(neighborUnionSizes[(t1, t2)])))
+                
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+  
+
+if __name__ == '__main__':
+    main()
diff --git a/measures/subtract.py b/measures/subtract.py
new file mode 100644
index 0000000..727dbe3
--- /dev/null
+++ b/measures/subtract.py
@@ -0,0 +1,73 @@
+import sys
+sys.path.append('./modules/')
+
+import codecs
+from docopt import docopt
+import logging
+import time
+
+
+def main():
+    """
+    Subtract values in tab-separated CSV files.
+    """
+
+    # Get the arguments
+    args = docopt("""Subtract values in tab-separated CSV files.
+
+    Usage:
+        subtract.py [-a] <targetFile> <valueFile1> <valueFile2> <outPath>
+        
+    Arguments:
+        <targetFile> = target strings in first column
+        <valueFile1> = strings in first column and values in second column
+        <valueFile2> = strings in first column and values in second column
+        <outPath> = output path for result file
+
+    Options:
+        -a, --abs  store absolute (always positive) instead of raw difference
+
+    Note:
+        Assumes tap-separated CSV files as input. Appends nan if target is not present in valueFiles.
+
+    """)
+    
+    targetFile = args['<targetFile>']
+    valueFile1 = args['<valueFile1>']
+    valueFile2 = args['<valueFile2>']
+    outPath = args['<outPath>']
+    isAbsolute = args['--abs']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+    
+    # Get targets
+    with codecs.open(targetFile, 'r', 'utf-8') as f_in:
+        targets = [line.strip().split('\t')[0] for line in f_in]
+
+    # Get target-value map 1
+    with codecs.open(valueFile1, 'r', 'utf-8') as f_in:
+        string2value1 = dict([( line.strip().split('\t')[0], float(line.strip().split('\t')[1]) ) for line in f_in])
+    
+    # Get target-value map 2
+    with codecs.open(valueFile2, 'r', 'utf-8') as f_in:
+        string2value2 = dict([( line.strip().split('\t')[0], float(line.strip().split('\t')[1]) ) for line in f_in])
+
+    # Print only targets to output file
+    with codecs.open(outPath+'.csv', 'w', 'utf-8') as f_out:
+        for string in targets:
+            try:
+                if isAbsolute:
+                    print >> f_out, '\t'.join((string, str(abs(string2value2[string]-string2value1[string]))))
+                else:
+                    print >> f_out, '\t'.join((string, str(string2value2[string]-string2value1[string])))                    
+            except KeyError:
+                print >> f_out, '\t'.join((string, 'nan'))
+    
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+    
+if __name__ == '__main__':
+    main()
diff --git a/measures/transform.py b/measures/transform.py
new file mode 100644
index 0000000..b145eb8
--- /dev/null
+++ b/measures/transform.py
@@ -0,0 +1,65 @@
+import sys
+sys.path.append('./modules/')
+
+import codecs
+from docopt import docopt
+import logging
+import time
+import numpy as np
+
+def main():
+    """
+    Transform values from tab-separated CSV file by function specified as option.
+    """
+
+    # Get the arguments
+    args = docopt("""Transform values from tab-separated CSV file by function specified as option.
+
+    Usage:
+        transform.py -l <targetFile> <valueFile> <outPath>
+        
+    Arguments:
+        <targetFile> = target strings in first column
+        <valueFile> = strings in first column and values in second column
+        <outPath> = output path for result file
+
+    Options:
+        -l, --log2  logarithmic transformation (base 2)
+
+    Note:
+        Assumes tap-separated CSV files as input. Appends nan if target is not present in valueFile or normFile.
+
+    """)
+    
+    targetFile = args['<targetFile>']
+    valueFile = args['<valueFile>']
+    outPath = args['<outPath>']
+    is_log2 = args['--log2']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+    
+    # Get targets
+    with codecs.open(targetFile, 'r', 'utf-8') as f_in:
+        targets = [line.strip().split('\t')[0] for line in f_in]
+
+    # Get target-value map
+    with codecs.open(valueFile, 'r', 'utf-8') as f_in:
+        string2value = dict([( line.strip().split('\t')[0], float(line.strip().split('\t')[1]) ) for line in f_in])
+
+    # Print only targets to output file
+    with codecs.open(outPath+'.csv', 'w', 'utf-8') as f_out:
+        for string in targets:
+            try:
+                if is_log2:
+                    print >> f_out, '\t'.join((string, str(np.log2(string2value[string]))))
+            except KeyError:
+                print >> f_out, '\t'.join((string, 'nan'))
+    
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+
+if __name__ == '__main__':
+    main()
diff --git a/measures/types.py b/measures/types.py
new file mode 100644
index 0000000..a282680
--- /dev/null
+++ b/measures/types.py
@@ -0,0 +1,83 @@
+import sys
+sys.path.append('./modules/')
+
+import os
+from os.path import basename
+from docopt import docopt
+from dsm import load_pkl_files
+import logging
+import time
+import codecs
+import numpy as np
+
+            
+def main():
+    """
+    Compute number of context types for all rows of a vector space and save their scores.
+    """
+
+    # Get the arguments
+    args = docopt("""Compute number of context types for all rows of a vector space and save their scores.
+
+    Usage:
+        types.py [(-n <normConst>)] <spacePrefix> <outPath> [<testset>]
+
+        <spacePrefix> = path to pickled space without suffix
+        <outPath> = output path for result file
+        <testset> = path to file with targets in first column
+        <normConst> = normalization constant
+
+    Options:
+        -n, --nrm  normalize values by normalization constant
+        
+    """)
+    
+    is_norm = args['--nrm']
+    if is_norm:
+        normConst = float(args['<normConst>'])
+    spacePrefix = args['<spacePrefix>']
+    outPath = args['<outPath>']        
+    testset = args['<testset>']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    space = load_pkl_files(spacePrefix)
+
+    if testset!=None:
+        # target vectors in first/second column are computed from space1/space2
+        with codecs.open(testset, 'r', 'utf-8') as f_in:
+            targets = [line.strip().split('\t')[0] for line in f_in]
+    else:
+        # If no test set is provided, compute values for all targets
+        targets = [target.decode('utf-8') for target in space.get_row2id()]  
+    
+    scores = {}
+    # Iterate over targets
+    for i, v in enumerate(targets):
+        
+        try:
+            row = space.get_row(v.encode('utf8'))
+        except KeyError:
+            scores[v] = 'nan'
+            continue
+        
+        # Get number of non-zero elements in row
+        types = row.get_mat().getnnz()
+        
+        scores[v] = types
+
+        
+    with codecs.open(outPath+'.csv', 'w', 'utf-8') as f_out:
+        for word in targets:
+            if is_norm:
+                scores[word]=float(scores[word])/normConst
+            print >> f_out, '\t'.join((word, str(float(scores[word]))))   
+
+            
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+    
+
+if __name__ == '__main__':
+    main()
diff --git a/modules/__init__.py b/modules/__init__.py
new file mode 100644
index 0000000..0519ecb
--- /dev/null
+++ b/modules/__init__.py
@@ -0,0 +1 @@
+ 
\ No newline at end of file
diff --git a/modules/__pycache__/cupy_utils.cpython-37.pyc b/modules/__pycache__/cupy_utils.cpython-37.pyc
new file mode 100644
index 0000000..e6690e4
Binary files /dev/null and b/modules/__pycache__/cupy_utils.cpython-37.pyc differ
diff --git a/modules/__pycache__/embeddings.cpython-37.pyc b/modules/__pycache__/embeddings.cpython-37.pyc
new file mode 100644
index 0000000..fad4440
Binary files /dev/null and b/modules/__pycache__/embeddings.cpython-37.pyc differ
diff --git a/modules/composes/__init__.py b/modules/composes/__init__.py
new file mode 100755
index 0000000..914df52
--- /dev/null
+++ b/modules/composes/__init__.py
@@ -0,0 +1,12 @@
+import logging
+
+class NullHandler(logging.Handler):
+    """For python versions <= 2.6; same as `logging.NullHandler` in 2.7."""
+    def emit(self, record):
+        pass
+
+logger = logging.getLogger(__name__)
+if len(logger.handlers) == 0:    # To ensure reload() doesn't add another one
+    logger.addHandler(NullHandler())
+
+#logging.basicConfig(filename='composes.log', filemode='w+',level=logging.DEBUG, format = "")
diff --git a/modules/composes/composition/__init__.py b/modules/composes/composition/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/composition/composition_model.py b/modules/composes/composition/composition_model.py
new file mode 100755
index 0000000..2e87c0b
--- /dev/null
+++ b/modules/composes/composition/composition_model.py
@@ -0,0 +1,259 @@
+'''
+Created on Oct 5, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+import time
+import math
+from warnings import warn
+from composes.semantic_space.space import Space
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.utils.gen_utils import assert_is_instance
+from composes.utils.matrix_utils import resolve_type_conflict
+from composes.utils.io_utils import create_parent_directories
+
+import logging
+from composes.utils import log_utils as log
+
+logger = logging.getLogger(__name__)
+
+class CompositionModel(object):
+    """
+    Parent class of the composition models.
+    """
+
+    _name = "no name"
+
+    MAX_MEM_OVERHEAD = 0.2
+    
+    """
+    double, in interval [0,1]
+    maximum overhead allowed: MAX_MEM_OVERHEAD ratio of argument space memory 
+    when composing
+    """
+    composed_id2column = None
+    """
+    List of strings, the column strings of the resulted composed space.
+    """
+
+    def __init__(self):
+        """
+        Constructor
+        """
+
+    def train(self, train_data, arg_space, phrase_space):
+        """
+        Trains a composition model and sets its learned parameters.
+
+        Args:
+            train_data: list of string tuples. Each tuple contains 3
+            string elements: (arg1, arg2, phrase).
+
+            arg_space: argument space(s). Space object or a tuple of two
+            Space objects (e.g. my_space, or (my_space1, my_space2)).
+            If two spaces are provided, arg1 elements of train data are
+            interpreted in space1, and arg2 in space2.
+
+            phrase space: phrase space, of type Space.
+
+        Calls the specific training routine of the current composition
+        model. Training tuples which contain strings not found in their
+        respective spaces are ignored.
+
+        The id2column attribute of the resulted composed space is set to
+        be equal to that of the phrase space given as an input.
+        """
+
+        start = time.time()
+
+        arg1_space, arg2_space = self.extract_arg_spaces(arg_space)
+        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(train_data,
+                                                                     (arg1_space.row2id,
+                                                                      arg2_space.row2id,
+                                                                      phrase_space.row2id)
+                                                                     )
+
+
+        self._train(arg1_space, arg2_space, phrase_space,
+                 arg1_list, arg2_list, phrase_list)
+
+        self.composed_id2column = phrase_space.id2column
+
+        log.print_composition_model_info(logger, self, 1, "\nTrained composition model:")
+        log.print_info(logger, 2, "With total data points:%s" % len(arg1_list))
+        log.print_matrix_info(logger, arg1_space.cooccurrence_matrix, 3,
+                              "Semantic space of argument 1:")
+        log.print_matrix_info(logger, arg2_space.cooccurrence_matrix, 3,
+                              "Semantic space of argument 2:")
+        log.print_matrix_info(logger, phrase_space.cooccurrence_matrix, 3,
+                              "Semantic space of phrases:")
+        log.print_time_info(logger, time.time(), start, 2)
+
+
+    def _train(self, arg1_space, arg2_space, phrase_space, arg1_list, arg2_list, phrase_list):
+
+        arg1_mat = arg1_space.get_rows(arg1_list)
+        arg2_mat = arg2_space.get_rows(arg2_list)
+        phrase_mat = phrase_space.get_rows(phrase_list)
+
+        [arg1_mat, arg2_mat, phrase_mat] = resolve_type_conflict([arg1_mat,
+                                                                  arg2_mat,
+                                                                  phrase_mat],
+                                                                  DenseMatrix)
+
+        self._solve(arg1_mat, arg2_mat, phrase_mat)
+
+    def compose(self, data, arg_space):
+        """
+        Uses a composition model to compose elements.
+
+        Args:
+            data: data to be composed. List of tuples, each containing 3
+            strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the
+            elements to be composed and composed_phrase is the string associated
+            to their composition.
+
+            arg_space: argument space(s). Space object or a tuple of two
+            Space objects (e.g. my_space, or (my_space1, my_space2)).
+            If two spaces are provided, arg1 elements of data are
+            interpreted in space1, and arg2 in space2.
+
+        Returns:
+            composed space: a new object of type Space, containing the
+            phrases obtained through composition.
+
+        """
+        start = time.time()
+
+        arg1_space, arg2_space = self.extract_arg_spaces(arg_space)
+        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data,
+                                                                     (arg1_space.row2id,
+                                                                      arg2_space.row2id,
+                                                                      None))
+        
+        # we try to achieve at most MAX_MEM_OVERHEAD*phrase_space memory overhead
+        # the /3.0 is needed
+        # because the composing data needs 3 * len(train_data) memory (arg1 vector, arg2 vector, phrase vector)
+        chunk_size = int(max(arg1_space.cooccurrence_matrix.shape[0],arg2_space.cooccurrence_matrix.shape[0],len(phrase_list))
+                          * self.MAX_MEM_OVERHEAD / 3.0) + 1
+        
+        composed_mats = []
+        for i in range(int(math.ceil(len(arg1_list) / float(chunk_size)))):
+            beg, end = i*chunk_size, min((i+1)*chunk_size, len(arg1_list))
+
+            arg1_mat = arg1_space.get_rows(arg1_list[beg:end])
+            arg2_mat = arg2_space.get_rows(arg2_list[beg:end])
+
+            [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat],
+                                                                    DenseMatrix)
+            composed_mat = self._compose(arg1_mat, arg2_mat)
+            composed_mats.append(composed_mat)
+        
+        composed_phrase_mat = composed_mat.nary_vstack(composed_mats)
+        
+        if self.composed_id2column is None:
+            self.composed_id2column = self._build_id2column(arg1_space, arg2_space)
+
+        log.print_name(logger, self, 1, "\nComposed with composition model:")
+        log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0])
+        log.print_matrix_info(logger, composed_phrase_mat, 4,
+                              "Resulted (composed) semantic space::")
+        log.print_time_info(logger, time.time(), start, 2)
+        
+        return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
+
+    @classmethod
+    def extract_arg_spaces(cls, arg_space):
+        """
+        TO BE MOVED TO A UTILS MODULE!
+        """
+        if not isinstance(arg_space, tuple):
+            arg1_space = arg_space
+            arg2_space = arg_space
+        else:
+            if len(arg_space) != 2:
+                raise ValueError("expected two spaces, received %d-ary tuple "
+                                 % len(arg_space))
+            arg1_space, arg2_space = arg_space
+
+        assert_is_instance(arg1_space, Space)
+        assert_is_instance(arg2_space, Space)
+
+        cls._assert_space_match(arg1_space, arg2_space)
+
+        return arg1_space, arg2_space
+
+    @classmethod
+    def _assert_space_match(cls, arg1_space, arg2_space, phrase_space=None):
+
+        if arg1_space.id2column != arg2_space.id2column:
+            raise ValueError("Argument spaces do not have identical columns!")
+
+        if not phrase_space is None:
+            if arg1_space.id2column != phrase_space.id2column:
+                raise ValueError("Argument and phrase space do not have identical columns!")
+
+    def _build_id2column(self, arg1_space, arg2_space):
+        return arg1_space.id2column
+
+
+    def valid_data_to_lists(self, data, (row2id1, row2id2, row2id3)):
+        """
+        TO BE MOVED TO A UTILS MODULE!
+        """
+        list1 = []
+        list2 = []
+        list3 = []
+
+        j = 0
+        for i in xrange(len(data)):
+            sample = data[i]
+
+            cond = True
+
+            if not row2id1 is None:
+                cond = cond and sample[0] in row2id1
+
+            if not row2id2 is None:
+                cond = cond and sample[1] in row2id2
+
+            if not row2id3 is None:
+                cond = cond and sample[2] in row2id3
+
+            if cond:
+                list1.append(sample[0])
+                list2.append(sample[1])
+                list3.append(sample[2])
+                j += 1
+
+        if i + 1 != j:
+            warn("%d (out of %d) lines are ignored because one of the elements is not found in its semantic space"
+                 % ((i + 1) - j, (i + 1)))
+
+        if not list1:
+            raise ValueError("No valid data found for training/composition!")
+
+        return list1, list2, list3
+
+    def export(self, filename):
+        """
+        Prints the parameters of the composition model to file.
+
+        Args:
+            filename: output filename, string
+
+        Prints the parameters of the compositional model in an appropriate
+        format, specific to each model.
+        """
+        create_parent_directories(filename)
+        self._export(filename)
+
+    def get_name(self):
+        return self._name
+
+    name = property(get_name)
+    """
+    String, name of the composition model.
+    """
+
+
diff --git a/modules/composes/composition/dilation.py b/modules/composes/composition/dilation.py
new file mode 100755
index 0000000..b99fce6
--- /dev/null
+++ b/modules/composes/composition/dilation.py
@@ -0,0 +1,91 @@
+'''
+Created on Oct 15, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+import numpy as np
+from composition_model import CompositionModel
+from composes.utils.num_utils import is_numeric
+from composes.utils.py_matrix_utils import nonzero_invert
+
+
+class Dilation(CompositionModel):
+    """
+    Implements the dilation compositional model:
+
+        :math:`\\vec{p} = (\\vec{u} \\cdot \\vec{u}) \\vec{v} + (\\lambda - 1) (\\vec{u} \\cdot \\vec{v}) \\vec{u}`
+
+    where :math:`\\vec{p}` is the vector of the composed phrase, :math:`\\vec{u}, \\vec{v}` the vectors of the components
+    and :math:`\\lambda` is a scalar.
+
+    """
+
+
+    _name = "dilation"
+
+    _lambda = 2
+
+
+    def __init__(self, lambda_=None):
+        """
+        Constructor.
+
+        Args:
+            lambda_ : numeric, value of the lambda parameter. Optional.
+        """
+
+        if not lambda_ is None:
+            if not is_numeric(lambda_):
+                raise ValueError("Parameter not numeric: %s " %(type(lambda_)))
+            else:
+                self._lambda = lambda_
+
+    def _solve(self, arg1_mat, arg2_mat, phrase_mat):
+
+        v1_row_norms = arg1_mat.norm(1)
+        v1_row_sqr_norms = np.multiply(v1_row_norms, v1_row_norms)
+
+        v2_minus_p = arg2_mat.scale_rows(v1_row_sqr_norms) - phrase_mat
+        v1_dot_prod_v2_minus_p = arg1_mat.multiply(v2_minus_p).sum(1)
+
+        v1_v2 = arg1_mat.multiply(arg2_mat).sum(1)
+        v1_v2_sqr = np.multiply(v1_v2, v1_v2)
+
+        nom = np.multiply(v1_v2_sqr, v1_row_sqr_norms).sum()
+        denom = np.multiply(v1_v2, v1_dot_prod_v2_minus_p).sum()
+
+        if nom != 0:
+            self._lambda = 1 - denom/nom
+        else:
+            self._lambda = 2
+
+
+    def _compose(self, arg1_mat, arg2_mat):
+        # TO DO: this is inefficient here, we do 2 for s instead of one
+        # we do a for in get_rows in parent.compose() and a for here
+        # comp = ((self._lambda -1) * v1.multiply(v2).sum()/pow(v1.norm(),2)) * v1 + v2
+
+        v1_row_norms = arg1_mat.norm(1)
+        scale_factors1 = arg1_mat.multiply(arg2_mat).sum(1)
+        scale_factors2 = np.multiply(v1_row_norms, v1_row_norms)
+
+        arg1_mat_scaled = arg1_mat.scale_rows(scale_factors1)
+        arg2_mat_scaled = arg2_mat.scale_rows(scale_factors2)
+
+        #print "FACTORS u:", ((self._lambda -1)*scale_factors1).sum()/float(len(scale_factors1))
+        #print "FACTORS v:", (scale_factors2).sum()/float(len(scale_factors2))
+
+        result = (self._lambda - 1) * arg1_mat_scaled + arg2_mat_scaled
+
+        return result
+
+    def get_lambda(self):
+        return self._lambda
+    """
+    Lambda parameter. Default, set to lambda=2.
+    """
+
+
+    def _export(self, filename):
+        with open(filename, "w") as output_stream:
+            output_stream.write("lambda\t%f" % self._lambda)
diff --git a/modules/composes/composition/full_additive.py b/modules/composes/composition/full_additive.py
new file mode 100755
index 0000000..5961202
--- /dev/null
+++ b/modules/composes/composition/full_additive.py
@@ -0,0 +1,139 @@
+'''
+Created on Oct 5, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from composition_model import CompositionModel
+from composes.utils.gen_utils import assert_is_instance
+from composes.utils.matrix_utils import is_array_or_matrix
+from composes.utils.matrix_utils import padd_matrix
+from composes.utils.matrix_utils import to_compatible_matrix_types
+from composes.utils.regression_learner import LstsqRegressionLearner
+from composes.utils.regression_learner import RegressionLearner
+from composes.utils.matrix_utils import resolve_type_conflict
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.exception.illegal_state_error import IllegalStateError
+
+
+class FullAdditive(CompositionModel):
+    """
+    Implements the full additive compositional model:
+
+        :math:`\\vec{p} = A \\vec{u} + B \\vec{v}`
+
+    where :math:`\\vec{p}` is the vector of the composed phrase,
+    :math:`\\vec{u}, \\vec{v}`, the vectors of the components
+    and :math:`A`, :math:`B` are two matrices.
+
+    """
+    _name = "full_additive"
+    _mat_a_t = None
+    _mat_b_t = None
+
+
+    def __init__(self, A=None, B=None, learner=LstsqRegressionLearner()):
+        #TODO here; very important, should be able to set the intercept
+        #when mat a and mat b are given , to true or false. now by default is
+        #is false
+        """
+        Constructor.
+
+        Args:
+            A= : matrix A, of matrix-like type (Matrix, ndarray,
+            numpy matrix, scipy matrix). Optional (parameters can be set
+            through training.)
+
+            B= : matrix B, matrix-like type. Optional.
+
+            learner= : regression learner object, of type RegressionLearner.
+            Optional, default LstsqRegressionLearner.
+        """
+        if A is not None and B is not None:
+            mat_a = A
+            mat_b = B
+            if not is_array_or_matrix(mat_a):
+                raise TypeError("expected matrix type, received: %s"
+                                % type(mat_a))
+
+            if not is_array_or_matrix(mat_b):
+                raise TypeError("expected matrix type, received: %s"
+                                % type(mat_b))
+
+            mat_a, mat_b = to_compatible_matrix_types(mat_a, mat_b)
+            self._mat_a_t = mat_a.transpose()
+            self._mat_b_t = mat_b.transpose()
+            self._has_intercept = False
+
+        else:
+            self._regression_learner = learner
+            self._has_intercept = self._regression_learner.has_intercept()
+
+
+    def _solve(self, arg1_mat, arg2_mat, phrase_mat):
+
+        self._has_intercept = self._regression_learner.has_intercept()
+
+        result = self._regression_learner.train(arg1_mat.hstack(arg2_mat), phrase_mat)
+
+        self._mat_a_t = result[0:arg1_mat.shape[1], :]
+        self._mat_b_t = result[arg1_mat.shape[1]:, :]
+
+
+    def _compose(self, arg1_mat, arg2_mat):
+        #NOTE when we get in this compose arg1 mat and arg2 mat have the same type
+        [mat_a_t, mat_b_t, arg1_mat] = resolve_type_conflict([self._mat_a_t,
+                                                              self._mat_b_t,
+                                                              arg1_mat],
+                                                             type(arg1_mat))
+        if self._has_intercept:
+            return arg1_mat * mat_a_t + padd_matrix(arg2_mat, 1) * mat_b_t
+        else:
+            return arg1_mat * mat_a_t + arg2_mat * mat_b_t
+
+    def set_regression_learner(self, regression_learner):
+        assert_is_instance(regression_learner, RegressionLearner)
+        self._regression_learner = regression_learner
+
+    def get_regression_learner(self):
+        return self._regression_learner
+
+    regression_learner = property(get_regression_learner, set_regression_learner)
+    """
+    Regression method to be used in training, of type RegressionLearner.
+    Default is LstsqRegressionLearner.
+    """
+
+    def _build_id2column(self, arg1_space, arg2_space):
+        return []
+
+    def _export(self, filename):
+        if self._mat_a_t is None or self._mat_b_t is None:
+            raise IllegalStateError("cannot export an untrained FullAdditive model.")
+
+        with open(filename, "w") as output_stream:
+            output_stream.write("A\n")
+            output_stream.write(str(DenseMatrix(self._mat_a_t).mat.T))
+            output_stream.write("\nB\n")
+
+            if self._has_intercept:
+                output_stream.write(str(DenseMatrix(self._mat_b_t[:-1,]).mat.T))
+                output_stream.write("\nIntercept\n")
+                output_stream.write(str(DenseMatrix(self._mat_b_t[-1,]).mat.T))
+            else:
+                output_stream.write(str(DenseMatrix(self._mat_b_t).mat.T))
+
+
+    def get_mat_a_t(self):
+        return self._mat_a_t
+    mat_a_t = property(get_mat_a_t)
+    """
+    Transpose of matrix A parameter, of type Matrix.
+    """
+
+    def get_mat_b_t(self):
+        return self._mat_b_t
+    mat_b_t = property(get_mat_b_t)
+    """
+    Transpose of matrix B parameter, of type Matrix.
+    """
diff --git a/modules/composes/composition/lexical_function.py b/modules/composes/composition/lexical_function.py
new file mode 100755
index 0000000..cb07b3b
--- /dev/null
+++ b/modules/composes/composition/lexical_function.py
@@ -0,0 +1,288 @@
+'''
+Created on Oct 11, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+import numpy as np
+import time
+from composition_model import CompositionModel
+from composes.semantic_space.space import Space
+from composes.utils.gen_utils import get_partitions
+from composes.utils.regression_learner import LstsqRegressionLearner
+from composes.utils.regression_learner import RegressionLearner
+from composes.utils.matrix_utils import resolve_type_conflict
+from composes.utils.matrix_utils import get_type_of_largest
+from composes.utils.matrix_utils import padd_matrix
+from composes.utils.num_utils import is_integer
+from composes.utils.gen_utils import assert_is_instance
+from composes.exception.illegal_state_error import IllegalStateError
+
+import logging
+from composes.utils import log_utils as log
+
+logger = logging.getLogger(__name__)
+
+
+class LexicalFunction(CompositionModel):
+    """
+    Implements the lexical function compositional model.
+
+        :math:`\\vec{p} = U \\vec{v}`
+
+    where :math:`\\vec{p}` is the vector of the composed phrase,
+    :math:`U` is the matrix representation of the first component (the lexical function)
+    and :math:`\\vec{v}` is the vector representation of the second component
+
+    """
+
+    _name = "lexical_function"
+
+    def __init__(self, function_space=None, intercept=False, learner=None, min_samples=1):
+        """
+        Constructor.
+
+        Args:
+            function_space= : function space parameter, containing
+            the lexical functions, of type Space. Optional, can be set through
+            training.
+
+            intercept= : True/False, True if the function space has intercept.
+            Optional, default False. When training is used, intercept is set
+            to the intercept value of the regression learner used.
+
+            learner= : regression method of type RegressionLearner. Optional,
+            default LstsqRegressionLearner.
+
+            min_samples= : minimum number of training samples required before a
+            LexicalFunction can be trained. Optional, default 1.
+
+        """
+        # assert_valid_kwargs(kwargs, ["function_space", "intercept", "learner"])
+
+        self.composed_id2column = []
+        if learner and function_space:
+            raise ValueError("Cannot instantiate with both learner and function_space!")
+
+        self._regression_learner = learner if learner else LstsqRegressionLearner()
+        self._function_space = function_space
+        self._has_intercept = intercept
+        self._MIN_SAMPLES = min_samples
+
+
+    def train(self, train_data, arg_space, phrase_space):
+        """
+        Trains a lexical function composition model to learn a function
+        space and sets the function_space parameter.
+
+        Args:
+            train_data: list of string tuples. Each tuple contains 3
+            string elements: (function_word, arg, phrase).
+
+            arg_space: argument space, of type Space. arg elements of
+            train data are interpreted in this space.
+
+            phrase space: phrase space, of type Space. phrase elements of
+            the train data are interpreted in this space.
+
+        Training tuples which contain strings not found in their
+        respective spaces are ignored. Function words containing less than
+        _MIN_SAMPLES training instances are ignored. For example, if
+        _MIN_SAMPLES=2 and function word "red" occurs in only one phrase, "red"
+        is ignored.
+
+        The id2column attribute of the resulted composed space is set to
+        be equal to that of the phrase space given as an input.
+        """
+
+        start = time.time()
+
+        self._has_intercept = self._regression_learner.has_intercept()
+
+        if not isinstance(arg_space, Space):
+            raise ValueError("expected one input spaces!")
+
+        result_mats = []
+
+        train_data = sorted(train_data, key=lambda tup: tup[0])
+        function_word_list, arg_list, phrase_list = self.valid_data_to_lists(train_data,
+                                                                             (None,
+                                                                              arg_space.row2id,
+                                                                              phrase_space.row2id))
+        #partitions the sorted input data
+        keys, key_ranges = get_partitions(function_word_list, self._MIN_SAMPLES)
+
+        if not keys:
+            raise ValueError("No valid training data found!")
+
+        assert (len(arg_space.element_shape) == 1)
+
+        if self._has_intercept:
+            new_element_shape = phrase_space.element_shape + (arg_space.element_shape[0] + 1,)
+        else:
+            new_element_shape = phrase_space.element_shape + (arg_space.element_shape[0],)
+
+        for i in xrange(len(key_ranges)):
+            idx_beg, idx_end = key_ranges[i]
+
+            print ("Training lexical function...%s with %d samples"
+                   % (keys[i], idx_end - idx_beg))
+
+            arg_mat = arg_space.get_rows(arg_list[idx_beg:idx_end])
+            phrase_mat = phrase_space.get_rows(phrase_list[idx_beg:idx_end])
+
+            #convert them to the same type
+            matrix_type = get_type_of_largest([arg_mat, phrase_mat])
+            [arg_mat, phrase_mat] = resolve_type_conflict([arg_mat, phrase_mat],
+                                                          matrix_type)
+
+            result_mat = self._regression_learner.train(arg_mat, phrase_mat).transpose()
+
+            result_mat.reshape((1, np.prod(new_element_shape)))
+
+            result_mats.append(result_mat)
+
+        new_space_mat = arg_mat.nary_vstack(result_mats)
+
+        self.composed_id2column = phrase_space.id2column
+
+        self._function_space = Space(new_space_mat, keys, [],
+                                     element_shape=new_element_shape)
+
+        log.print_composition_model_info(logger, self, 1, "\nTrained composition model:")
+        log.print_info(logger, 3, "Trained: %s lexical functions" % len(keys))
+        log.print_info(logger, 3, "With total data points:%s" % len(function_word_list))
+        log.print_matrix_info(logger, arg_space.cooccurrence_matrix, 3,
+                              "Semantic space of arguments:")
+        log.print_info(logger, 3, "Shape of lexical functions learned:%s"
+                                  % (new_element_shape,))
+        log.print_matrix_info(logger, new_space_mat, 3,
+                              "Semantic space of lexical functions:")
+        log.print_time_info(logger, time.time(), start, 2)
+
+    def compose(self, data, arg_space):
+        """
+        Uses a lexical function composition model to compose elements.
+
+        Args:
+            data: data to be composed. List of tuples, each containing 3
+            strings: (function_word, arg, composed_phrase). function_word and
+            arg are the elements to be composed and composed_phrase is the
+            string associated to their composition. function_word elements
+            are interpreted in self.function_space.
+
+            arg_space: argument space, of type Space. arg elements of data are
+            interpreted in this space.
+
+        Returns:
+            composed space: a new object of type Space, containing the
+            phrases obtained through composition.
+
+        """
+        start = time.time()
+
+        assert_is_instance(arg_space, Space)
+        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data,
+                                                                     (self._function_space.row2id,
+                                                                      arg_space.row2id,
+                                                                      None))
+
+        composed_vec_list = []
+        for i in xrange(len(arg1_list)):
+            arg1_vec = self._function_space.get_row(arg1_list[i])
+            arg2_vec = arg_space.get_row(arg2_list[i])
+
+            matrix_type = get_type_of_largest([arg1_vec, arg2_vec])
+            [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec],
+                                                         matrix_type)
+
+            composed_ph_vec = self._compose(arg1_vec, arg2_vec,
+                                            self._function_space.element_shape)
+
+            composed_vec_list.append(composed_ph_vec)
+
+        result_element_shape = self._function_space.element_shape[0:-1]
+        composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list)
+
+        log.print_name(logger, self, 1, "\nComposed with composition model:")
+        log.print_info(logger, 3, "Composed total data points:%s" % len(arg1_list))
+        log.print_info(logger, 3, "Functional shape of the resulted (composed) elements:%s"
+                                  % (result_element_shape,))
+        log.print_matrix_info(logger, composed_ph_mat, 4,
+                              "Resulted (composed) semantic space:")
+        log.print_time_info(logger, time.time(), start, 2)
+
+        return Space(composed_ph_mat, phrase_list, self.composed_id2column,
+                     element_shape=result_element_shape)
+
+
+    def _compose(self, function_arg_vec, arg_vec, function_arg_element_shape):
+
+        new_shape = (np.prod(function_arg_element_shape[0:-1]),
+                     function_arg_element_shape[-1])
+
+        function_arg_vec.reshape(new_shape)
+
+        if self._has_intercept:
+            comp_el = function_arg_vec * padd_matrix(arg_vec.transpose(), 0)
+        else:
+            comp_el = function_arg_vec * arg_vec.transpose()
+
+        return comp_el.transpose()
+
+    @classmethod
+    def _assert_space_match(cls, arg1_space, arg2_space, phrase_space=None):
+        pass
+
+    def set_regression_learner(self, regression_learner):
+        assert_is_instance(regression_learner, RegressionLearner)
+        self._regression_learner = regression_learner
+
+    def get_regression_learner(self):
+        return self._regression_learner
+
+    regression_learner = property(get_regression_learner, set_regression_learner)
+    """
+    Regression method to be used in training, of type RegressionLearner.
+    Default is RidgeRegressionLearner(param=1).
+    """
+
+    def get_function_space(self):
+        return self._function_space
+
+    function_space = property(get_function_space)
+    """
+    Function space parameter, containing the lexical functions, of type Space.
+    Can be set through training or through initialization, default None.
+    """
+
+    def get_has_intercept(self):
+        return self._has_intercept
+
+    has_intercept = property(get_has_intercept)
+    """
+    Has intercept parameter, boolean. If True, then the function_space is
+    assumed to contain intercept. Can be set through training or through
+    initialization, default is assumed to be False.
+    """
+
+    def set_min_samples(self, min_samples):
+        if not is_integer(min_samples):
+            raise ValueError("expected %s min_samples value, received %s"
+                             % ("integer", type(min_samples)))
+        self._MIN_SAMPLES = min_samples
+
+    def get_min_samples(self):
+        return self._MIN_SAMPLES
+
+    MIN_SAMPLES = property(get_min_samples, set_min_samples)
+    """
+    Minimal number of samples for each training instance. Default 3.
+    """
+
+    def _export(self, filename):
+        if self._function_space is None:
+            raise IllegalStateError("cannot export an untrained LexicalFunction model.")
+        self._function_space.export(filename, format="dm")
+
+
diff --git a/modules/composes/composition/multiplicative.py b/modules/composes/composition/multiplicative.py
new file mode 100755
index 0000000..c656ac1
--- /dev/null
+++ b/modules/composes/composition/multiplicative.py
@@ -0,0 +1,42 @@
+'''
+Created on Oct 5, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from composition_model import CompositionModel
+from composes.exception.illegal_state_error import IllegalOperationError
+
+class Multiplicative(CompositionModel):
+    """
+    Implements the component-wise multiplication compositional model:
+
+        :math:`\\vec{p} = \\vec{u} \\cdot \\vec{v}`
+
+    where :math:`\\vec{p}` is the vector of the composed phrase and
+    :math:`\\vec{u}, \\vec{v}` are the vectors of the components.
+
+    :math:`\\vec{u} \\cdot \\vec{v} = (u_1v_1,...,u_nv_n)`
+    """
+
+    _name = "multiplicative"
+
+    def __init__(self):
+        """
+        Constructor
+        """
+
+    def train(self):
+        """
+        Current multiplicative model cannot be trained, it has no parameters.
+        """
+        raise IllegalOperationError("Cannot train multiplicative model!")
+
+    def _compose(self, arg1_mat, arg2_mat):
+        return arg1_mat.multiply(arg2_mat)
+
+    def export(self, filename):
+        """
+        Current multiplicative model cannot be exported, it has no parameters.
+        """
+        raise IllegalOperationError("cannot export a Multiplicative model.")
diff --git a/modules/composes/composition/weighted_additive.py b/modules/composes/composition/weighted_additive.py
new file mode 100755
index 0000000..09bf9b9
--- /dev/null
+++ b/modules/composes/composition/weighted_additive.py
@@ -0,0 +1,143 @@
+'''
+Created on Oct 5, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from composition_model import CompositionModel
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.utils.num_utils import is_numeric
+# from composes.utils.mem_utils import get_mem_usage
+from composes.utils.matrix_utils import resolve_type_conflict
+import numpy as np
+import math
+
+class WeightedAdditive(CompositionModel):
+    """
+    Implements weighted additive compositional model:
+
+        :math:`\\vec{p} = \\alpha \\vec{u} + \\beta \\vec{v}`
+
+    where :math:`\\vec{p}` is the vector of the composed phrase and
+    :math:`\\vec{u}, \\vec{v}` are the vectors of the components
+
+    When :math:`\\alpha=\\beta=0.5` the model performs simple vector addition.
+    """
+
+    _name = "weighted_additive"
+
+    """
+    double, in interval [0,1]
+    maximum overhead allowed: MAX_MEM_OVERHEAD ratio of peripheral space memory
+    """
+    MAX_MEM_OVERHEAD = 0.2
+
+
+    def __init__(self, alpha=None, beta=None):
+        """
+        Constructor.
+
+        Args:
+            alpha: alpha parameter, numeric type. Optional, can be set through
+            training
+            beta: beta parameter, numeric type. Optional, can be set through
+            training.
+
+        Raises:
+            TypeError if alpha or beta are not numeric.
+        """
+        self._alpha = 0.5
+        self._beta = 0.5
+        if not alpha is None:
+            if not is_numeric(alpha):
+                raise TypeError("Parameter not numeric: %s " % (type(alpha)))
+            else:
+                self._alpha = alpha
+
+        if not beta is None:
+            if not is_numeric(beta):
+                raise TypeError("Parameter not numeric: %s " % (type(beta)))
+            else:
+                self._beta = beta
+
+        if not alpha is None and beta is None:
+            self._beta = 1 - self._alpha
+
+
+    def _train(self, arg1_space, arg2_space, phrase_space, arg1_list, arg2_list, phrase_list):
+
+        # we try to achieve at most MAX_MEM_OVERHEAD*phrase_space memory overhead
+        # the /3.0 is needed
+        # because the train data needs 3 * len(train_data) memory (arg1 vector, arg2 vector, phrase vector)
+        chunk_size = int(phrase_space.cooccurrence_matrix.shape[0] * self.MAX_MEM_OVERHEAD / 3.0) + 1
+
+        arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr = (0, 0, 0, 0, 0)
+
+        for i in range(int(math.ceil(len(arg1_list) / float(chunk_size)))):
+            beg, end = i*chunk_size, min((i+1)*chunk_size, len(arg1_list))
+
+            arg1_mat = arg1_space.get_rows(arg1_list[beg:end])
+            arg2_mat = arg2_space.get_rows(arg2_list[beg:end])
+            phrase_mat = phrase_space.get_rows(phrase_list[beg:end])
+
+            [arg1_mat, arg2_mat, phrase_mat] = resolve_type_conflict([arg1_mat,
+                                                                      arg2_mat,
+                                                                      phrase_mat],
+                                                                      DenseMatrix)
+
+            res = self._process(arg1_mat, arg2_mat, phrase_mat)
+            arg1_arg2_dot += res[0]
+            arg1_phrase_dot += res[1]
+            arg2_phrase_dot += res[2]
+            arg1_norm_sqr += res[3]
+            arg2_norm_sqr += res[4]
+
+
+        self._solve(arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr)
+
+
+    def _process(self, arg1_mat, arg2_mat, phrase_mat):
+
+        # debug here
+        # remove when done
+        # print "Using %s MB " % (get_mem_usage())
+
+        arg1_arg2_dot = arg1_mat.multiply(arg2_mat).sum()
+        arg1_phrase_dot = arg1_mat.multiply(phrase_mat).sum()
+        arg2_phrase_dot = arg2_mat.multiply(phrase_mat).sum()
+
+        arg1_norm_sqr = pow(arg1_mat.norm(), 2)
+        arg2_norm_sqr = pow(arg2_mat.norm(), 2)
+
+        return arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr
+
+    def _solve(self, arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr):
+
+        a = np.linalg.pinv(np.mat([[arg1_norm_sqr,arg1_arg2_dot],
+                                   [arg1_arg2_dot,arg2_norm_sqr]]))
+        a = a * np.mat([[arg1_phrase_dot],[arg2_phrase_dot]])
+        self._alpha = a[0, 0]
+        self._beta = a[1, 0]
+
+
+    def _compose(self, arg1_mat, arg2_mat):
+        return self._alpha * arg1_mat + self._beta * arg2_mat
+
+    def _export(self, filename):
+        with open(filename, "w") as output_stream:
+            output_stream.write("alpha\t%f\n" % self._alpha)
+            output_stream.write("beta\t%f" % self._beta)
+
+    def get_alpha(self):
+        return self._alpha
+    alpha = property(get_alpha)
+    """
+    Alpha parameter, default 0.5.
+    """
+
+    def get_beta(self):
+        return self._beta
+    beta = property(get_beta)
+    """
+    Beta parameter, default 0.5.
+    """
diff --git a/modules/composes/exception/__init__.py b/modules/composes/exception/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/exception/illegal_state_error.py b/modules/composes/exception/illegal_state_error.py
new file mode 100755
index 0000000..5065f78
--- /dev/null
+++ b/modules/composes/exception/illegal_state_error.py
@@ -0,0 +1,18 @@
+'''
+Created on Jun 15, 2012
+
+@author: thenghia.pham
+'''
+
+class IllegalStateError(Exception):
+    '''
+    '''
+    def __init__(self, msg):
+        self.__msg = msg
+
+
+class IllegalOperationError(Exception):
+    '''
+    '''
+    def __init__(self, msg):
+        self.__msg = msg
\ No newline at end of file
diff --git a/modules/composes/exception/invalid_argument_error.py b/modules/composes/exception/invalid_argument_error.py
new file mode 100755
index 0000000..0613344
--- /dev/null
+++ b/modules/composes/exception/invalid_argument_error.py
@@ -0,0 +1,6 @@
+
+class InvalidArgumentError(Exception):
+    '''
+    '''
+    def __init__(self, msg):
+        self.__msg = msg
\ No newline at end of file
diff --git a/modules/composes/matrix/__init__.py b/modules/composes/matrix/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/matrix/dense_matrix.py b/modules/composes/matrix/dense_matrix.py
new file mode 100755
index 0000000..ae55185
--- /dev/null
+++ b/modules/composes/matrix/dense_matrix.py
@@ -0,0 +1,362 @@
+'''
+Created on Sep 17, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+import numpy as np
+from warnings import warn
+from scipy.sparse import issparse
+from composes.utils.num_utils import is_numeric
+from composes.matrix.matrix import Matrix
+
+class DenseMatrix(Matrix):
+    '''
+    classdocs
+    '''
+
+    def __init__(self, data):
+        """
+        Constructor, creates a DenseMatrix from a numpy matrix-like
+        object.
+
+        Matrix-like objects (np.ndarray, np.matrix, scipy.sparse.matrix,
+         SparseMatrix) are converted into np.matrix.
+
+        Params:
+            data: numpy matrix-like object or Matrix type
+
+        Raises:
+            TypeError: if input data is not one of scipy.sparse/
+                numpy.ndarray/numpy.matrix/Matrix
+        """
+
+        if issparse(data):
+            self.mat = data.todense()
+        elif isinstance(data, np.matrix):
+            if data.shape[0] == 0 or data.shape[1] == 0:
+                raise ValueError("cannot initialize empty matrix")
+            self.mat = data
+        elif isinstance(data, np.ndarray):
+            if len(data) == 0:
+                raise ValueError("cannot initialize empty matrix")
+            self.mat = np.matrix(data)
+        elif isinstance(data, Matrix):
+            # TODO: remove warning or remove import somehow fix this!!
+            # from composes.matrix.sparse_matrix import SparseMatrix
+            self.mat = data.to_dense_matrix().mat
+        else:
+            # TODO: raise suitable message
+            raise TypeError("expected matrix-like type, received %s"
+                            % type(data))
+
+    def __str__(self):
+        return str(self.mat)
+
+    def __getitem__(self, index):
+        result = self.mat[index]
+        if is_numeric(result):
+            return result
+        else:
+            return type(self)(result.copy())
+
+    def multiply(self, matrix_):
+        """
+        Computes component-wise multiplication of two matrices.
+
+        Args:
+            matrix_: a second matrix of type DenseMatrix
+
+        Returns:
+            A DenseMatrix containing the cw multiplication of the two.
+
+        Raises:
+            TypeError: if the argument is not of type DenseMatrix
+            ValueError: if the two matrices don t have the same shape.
+        """
+
+        self._assert_same_type(matrix_)
+        if self.mat.shape != matrix_.mat.shape:
+            raise ValueError("inconsistent shapes: %s %s"
+                             % (str(self.mat.shape), str(matrix_.mat.shape) ))
+        return DenseMatrix(np.multiply(self.mat, matrix_.mat))
+
+    def transpose(self):
+        """
+        Transposes the current matrix.
+
+        Returns:
+            DenseMatrix, a transpose of the current matrix.
+
+        """
+        return type(self)(self.mat.transpose().copy())
+
+    def reshape(self, new_shape):
+        """
+        Reshapes current matrix.
+
+        Overwrites the current matrix with a new matrix of the
+        given shape!
+
+        Args:
+            shape: length 2 tuple or pair of integers
+
+        Raises:
+            ValueError: if shape is not an integer pair or
+                if new shape is inconsistent with the total
+                size of the current matrix.
+        """
+
+        # TODO: change this is necessary to make a copy
+        self.mat = self.mat.reshape(new_shape)
+
+    @staticmethod
+    def identity(size):
+        """
+        Builds the identity matrix.
+
+        Args:
+            size: integer, the result matrix is of shape size x size
+
+        Returns:
+            Identity DenseMatrix.
+        """
+        return DenseMatrix(np.eye(size, size, 0, np.double))
+
+    def vstack(self, matrix_):
+        """
+        Vertical stack of two matrices.
+
+        Args:
+            matrix_: a second matrix of type DenseMatrix
+
+        Returns:
+            A DenseMatrix, vertical stack of the two matrices.
+
+        Raises:
+            TypeError: if the argument is not of type DenseMatrix
+
+        """
+        self._assert_same_type(matrix_)
+        return DenseMatrix(np.vstack((self.mat, matrix_.mat)))
+
+    def hstack(self, matrix_):
+        """
+        Horizontal stack of two matrices.
+
+        Args:
+            matrix_: a second matrix of type DenseMatrix
+
+        Returns:
+            A DenseMatrix, horizontal stack of the two matrices.
+
+        Raises:
+            TypeError: if the argument is not of type DenseMatrix
+
+        """
+        self._assert_same_type(matrix_)
+        return DenseMatrix(np.hstack((self.mat, matrix_.mat)))
+
+    @classmethod
+    def nary_vstack(cls, mat_list):
+        """
+        Class method, vertical stack of n matrices.
+
+        Args:
+            mat_list: a list of matrices of type DenseMatrix
+
+        Returns:
+            A DenseMatrix, vertical stack of the arguments.
+
+        """
+        np_mat_list = [matrix_.mat for matrix_ in mat_list]
+        return DenseMatrix(np.vstack(np_mat_list))
+
+    @classmethod
+    def nary_hstack(cls, mat_list):
+        """
+        Class method, horizontal stack of n matrices.
+
+        Args:
+            mat_list: a list of matrices of type DenseMatrix
+
+        Returns:
+            A DenseMatrix, horizontal stack of the arguments.
+
+        """
+
+        np_mat_list = [matrix_.mat for matrix_ in mat_list]
+        return DenseMatrix(np.hstack(np_mat_list))
+
+
+
+    def scale_rows(self, array_):
+        """
+        Scales each row of the matrix by the values given in an array.
+
+        Args:
+            array_: ndarray containing the values to scale by
+
+        Returns:
+            A new DenseMatrix with scaled rows.
+        """
+        self._assert_array(array_)
+
+        x_dim = self.mat.shape[0]
+        if array_.shape in ((x_dim, 1), (x_dim,)):
+            if array_.shape == (x_dim,):
+                array_ = array_.reshape((x_dim, 1))
+            return DenseMatrix(np.multiply(self.mat, array_))
+        else:
+            raise ValueError("inconsistent shapes: %s %s"
+                             % (str(self.mat.shape), str(array_.shape)))
+
+    def scale_columns(self, array_):
+        """
+        Scales each column of the matrix by the values given in an array.
+
+        Args:
+            array_: ndarray containing the values to scale by
+
+        Returns:
+            A new DenseMatrix with scaled columns.
+        """
+        self._assert_array(array_)
+
+        y_dim = self.mat.shape[1]
+        if array_.shape in ((1, y_dim), (y_dim,)):
+            return DenseMatrix(np.multiply(self.mat, array_))
+        else:
+            raise ValueError("inconsistent shapes: %s %s"
+                             % (str(self.mat.shape), str(array_.shape)))
+
+    def plog(self):
+        """
+        Applies positive log to the matrix elements.
+
+        Elements smaller than 1 (leading to not-defined log or negative log)
+        are set to 0. Log is applied on all other elements.
+
+        Modifies the current matrix.
+        """
+
+        #this line uses 3 x size(mat) to run in the worst case
+        #(if we select the entire matrix - depends on the size of the selection)
+        self.mat[self.mat < 1.0] = 1
+        self.mat = np.log(self.mat)
+
+
+    def assert_positive(self):
+        """
+        Asserts that all values are larger or equal to 0.
+
+        Raises:
+            ValueError if not all values are >= 0.
+        """
+        if not np.all(self.mat >= 0):
+            raise ValueError("expected non-negative matrix")
+
+    def get_non_negative(self):
+        """
+        Turns negative entries to 0.
+
+        Returns:
+            A new DenseMatrix matrix in which negative entries are set to 0.
+
+        """
+        mat_ = self.mat.copy()
+        # TODO: time against : mat_.data[mat_.data < 0] = 0
+        mat_ = np.where(mat_ > 0, mat_, 0)
+        return DenseMatrix(mat_)
+
+    def to_non_negative(self):
+        """
+        Turns negative entries to 0.
+
+        Modifies the current matrix: all negative entries are set to 0.
+
+        """
+
+        self.mat = np.where(self.mat > 0, self.mat, 0)
+
+    def to_ones(self):
+        """
+        Turns strictly positive entries to 1 and negative entries to 0.
+
+        Modifies the current matrix: all strictly positive entries are
+            set to 1, all negative entries are set to 0.
+
+        """
+
+        self.mat = np.where(self.mat > 0, 1, 0)
+
+    def remove_small_values(self, epsilon):
+        """
+        Sets values smaller than an epsilon to 0.
+
+        Args:
+            epsilon: scalar, threshold
+        Returns:
+            A DenseMatrix in which all values smaller than epsilon are
+                set to 0.
+
+        """
+        mat_ = self.mat.copy()
+        mat_ = np.where(mat_ > epsilon, mat_, 0)
+        return DenseMatrix(mat_)
+
+    def is_mostly_positive(self):
+        """
+        Checks if more than 50% of the non zero elements of a
+        matrix are positive.
+
+        """
+        return self.mat[self.mat > 0].size > self.mat.size/2
+
+    def all_close(self, matrix_):
+        """
+        Checks of the values in two matrices are all_close.
+
+        Args:
+            matrix_: input matrix of type DenseMatrix
+
+        Returns:
+            bool: True if the elements are allclose (using np.allclose).
+
+        """
+        return np.allclose(self.mat, matrix_.mat)
+
+    def norm(self, axis = None):
+        """
+        Computes the norms on a certain axis or of the entire matrix.
+
+        Args:
+            axis: 0/1 or None, if axis is None computes the norm of the
+                full matrix
+        Returns:
+            nd.array containing the norms on a given axis, or a scalar
+            if the axis is None.
+
+        """
+        if axis is None:
+            return np.linalg.norm(self.mat)
+        else:
+            return np.sqrt(self.multiply(self).sum(axis))
+
+    def to_sparse_matrix(self):
+        """
+        Converts to SparseMatrix.
+        """
+        from composes.matrix.sparse_matrix import SparseMatrix
+        return SparseMatrix(self.mat)
+
+    def to_dense_matrix(self, copy = False):
+        """
+        Returns a copy is copy=True, returns self otherwise.
+        """
+
+        if (copy):
+            return self.copy()
+        else:
+            return self
+
diff --git a/modules/composes/matrix/linalg.py b/modules/composes/matrix/linalg.py
new file mode 100755
index 0000000..b2155a5
--- /dev/null
+++ b/modules/composes/matrix/linalg.py
@@ -0,0 +1,406 @@
+'''
+Created on Oct 4, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+import numpy as np
+import logging
+import scipy.linalg as splinalg
+from sparsesvd import sparsesvd
+from warnings import warn
+from time import time
+from math import sqrt
+from composes.matrix.matrix import Matrix
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.matrix.sparse_matrix import SparseMatrix
+from composes.utils.matrix_utils import assert_same_shape
+from composes.utils.matrix_utils import padd_matrix
+import composes.utils.log_utils as log
+
+logger = logging.getLogger(__name__)
+
+class Linalg(object):
+    """
+    Contains a set of liniar algebra utilities defined to work both with sparse and
+    with dense matrices as an input (i.e. with objects of type SparseMatrix/DenseMatrix).
+
+    Implements:
+        svd,
+        nmf (LIN algorithm, add citation here!),
+        pinv,
+        ordinary least squares regression,
+        ridge regression
+    """
+
+    _NMF_ALPHA = 1.0
+    _NMF_BETA = 0.1
+    _NMF_MAX_ITER = 20
+    _NMF_MAX_ITER_SUBPROB = 15
+    _NMF_MIN_TOL = 0.001
+    _NMF_TOL = _NMF_MIN_TOL
+    _NMF_TOL_DECREASE_FACTOR = 0.5
+    _NMF_TIME_LIMIT = 36000
+
+    _SVD_TOL = 1e-12
+
+    @staticmethod
+    def svd(matrix_, reduced_dimension):
+        """
+        Performs SVD decomposition.
+
+        If the rank is smaller than the requested reduced dimension,
+        reduction to rank is performed. Dense SVD uses Linalg._SVD_TOL to decide
+        the rank of the matrix.
+
+
+        Args:
+           matrix_: input of type Matrix
+           reduced_dimension: int, the desired reduced dimension
+
+        Returns:
+            U,S,V of the decomposition X = USV^T. U, V: Matrix type,
+            S: ndarray of singular values.
+
+        """
+        log.print_info(logger, 4, "In SVD..reducing to dim %d" % reduced_dimension)
+        log.print_matrix_info(logger, matrix_, 5, "Input matrix:")
+
+        #TODO: IMPORTANT!! do the sign normalization COLUMN-wise!!!not
+        #for the full matrix at once!!
+        if reduced_dimension == 0:
+            raise ValueError("Cannot reduce to dimensionality 0.")
+
+        if isinstance(matrix_, SparseMatrix):
+            result =  Linalg._sparse_svd(matrix_, reduced_dimension)
+        elif isinstance(matrix_, DenseMatrix):
+            result =  Linalg._dense_svd(matrix_, reduced_dimension)
+        else:
+            raise TypeError("expected Matrix type, received %s" % type(matrix_))
+
+        log.print_matrix_info(logger, result[0], 5, "Resulting matrix U:")
+        return result
+
+    @staticmethod
+    def ridge_regression(matrix_a , matrix_b, lambda_, intercept=False):
+        #log.print_info(logger, "In Ridge regression..", 4)
+        #log.print_matrix_info(logger, matrix_a, 5, "Input matrix A:")
+        #log.print_matrix_info(logger, matrix_b, 5, "Input matrix B:")
+        """
+        Performs Ridge Regression.
+
+        This method use the general formula:
+            ...
+        to solve the problem:
+            :math:`X = argmin(||AX - B||_2 + \\lambda||X||_2)`
+
+        Args:
+            matrix_a: input matrix A, of type Matrix
+            matrix_b: input matrix A, of type Matrix
+            lambda_: scalar, lambda parameter
+            intercept: bool. If True intercept is used. Optional, default False.
+
+        Returns:
+            solution X of type Matrix
+
+        """
+
+        matrix_a._assert_same_type(matrix_b)
+        # TODO: check out where to define this assert
+        assert_same_shape(matrix_a, matrix_b, 0)
+
+        matrix_type = type(matrix_a)
+        dim = matrix_a.shape[1]
+
+        if intercept:
+            matrix_a = matrix_a.hstack(matrix_type(np.ones((matrix_a.shape[0],
+                                                             1))))
+        lambda_diag = (lambda_ ) * matrix_type.identity(dim)
+
+        if intercept:
+            lambda_diag = padd_matrix(padd_matrix(lambda_diag, 0, 0.0), 1, 0.0)
+
+        matrix_a_t = matrix_a.transpose()
+        try:
+            tmp_mat = Linalg.pinv(((matrix_a_t * matrix_a) + lambda_diag))
+        except np.linalg.LinAlgError:
+            print "Warning! LinAlgError"
+            tmp_mat = matrix_type.identity(lambda_diag.shape[0])
+
+        tmp_res = tmp_mat * matrix_a_t
+        result = tmp_res * matrix_b
+
+        #S: used in generalized cross validation, page 244 7.52 (YZ also used it)
+        # S is defined in 7.31, page 232
+        # instead of computing the matrix and then its trace, we can compute
+        # its trace directly
+        # NOTE when lambda = 0 we get out trace(S) = rank(matrix_a)
+
+        dist = (matrix_a * result - matrix_b).norm()
+        S_trace = matrix_a_t.multiply(tmp_res).sum()
+
+        return result, S_trace, dist
+
+    @classmethod
+    def lstsq_regression(cls, matrix_a, matrix_b, intercept=False):
+        """
+        Performs Least Squares Regression.
+
+        Solves the problem:
+
+        :math:`X = argmin(||AX - B||_2)`
+
+        Args:
+            matrix_a: input matrix A, of type Matrix
+            matrix_b: input matrix A, of type Matrix
+            intercept: bool. If True intercept is used. Optional, False by default.
+
+        Returns:
+            solution X of type Matrix
+
+        """
+
+        matrix_a._assert_same_type(matrix_b)
+        # TODO: check out where to define this assert
+        assert_same_shape(matrix_a, matrix_b, 0)
+
+        if intercept:
+            matrix_a = matrix_a.hstack(type(matrix_a)(np.ones((matrix_a.shape[0],
+                                                             1))))
+        if isinstance(matrix_a, DenseMatrix):
+            result = Linalg._dense_lstsq_regression(matrix_a, matrix_b)
+        else:
+            result = Linalg._sparse_lstsq_regression(matrix_a, matrix_b)
+
+        return result
+
+    @staticmethod
+    def _dense_lstsq_regression(matrix_a , matrix_b):
+        return DenseMatrix(Linalg._numpy_lstsq_regression(matrix_a, matrix_b))
+        #return DenseMatrix(Linalg._scipy_lstsq_regression(matrix_a, matrix_b))
+
+    @staticmethod
+    def _sparse_lstsq_regression(matrix_a , matrix_b, intercept=False):
+        return Linalg.ridge_regression(matrix_a, matrix_b, 0.0)[0]
+        #return SparseMatrix(Linalg._dense_lstsq_regression(DenseMatrix(matrix_a),
+        #                                      DenseMatrix(matrix_b)))
+
+    @staticmethod
+    def _numpy_lstsq_regression(matrix_a, matrix_b, rcond=-1):
+        return np.linalg.lstsq(matrix_a.mat, matrix_b.mat, rcond)[0]
+
+    @staticmethod
+    def _scipy_lstsq_regression(matrix_a, matrix_b):
+        return splinalg.lstsq(matrix_a.mat, matrix_b.mat)[0]
+
+    @staticmethod
+    def _sparse_svd(matrix_, reduced_dimension):
+        #svds from scipy.sparse.linalg
+        #RAISES ValueError if the rank is smaller than reduced_dimension + 1
+        #TODO : fix this or replace with svdsparse
+        #??? eIGENVALUES ARE NOT SORTED!!!!!!
+        #IF EVER USE THIS; FIX THE PROBLEMS
+        #u, s, vt = svds(matrix_.mat, False, True)
+        """
+        Patch
+
+        Problem: sparsesvd sometimes returns fewer dimensions that requested.
+        It will be no longer needs when sparsesvd will allow
+        SVDLIBC parameters as an input (kappa parameter of SVDLIBC has to be
+        larger than the default. e.g. 1E-05 instead of 1E-06)
+
+        Current fix: ask for more dimensions and remove the unnecessary ones.
+        """
+
+        extra_dims = int(reduced_dimension/10)
+
+        ut, s, vt = sparsesvd(matrix_.mat.tocsc(), reduced_dimension + extra_dims)
+
+        u = SparseMatrix(ut.transpose())
+        v = SparseMatrix(vt.transpose())
+
+        no_cols = min(u.shape[1], reduced_dimension)
+        u = u[:, 0:no_cols]
+        v = v[:, 0:no_cols]
+
+        Linalg._check_reduced_dim(matrix_.shape[1], u.shape[1], reduced_dimension)
+
+        if not u.is_mostly_positive():
+            u = -u
+            v = -v
+
+        return u, s[0:no_cols], v
+
+    @staticmethod
+    def _dense_svd(matrix_, reduced_dimension):
+
+        print "Running dense svd"
+        u, s, vt = np.linalg.svd(matrix_.mat, False, True)
+        rank = len(s[s > Linalg._SVD_TOL])
+
+        no_cols = min(u.shape[1], reduced_dimension, rank)
+        u = DenseMatrix(u[:,0:no_cols])
+        s = s[0:no_cols]
+        v = DenseMatrix(vt[0:no_cols,:].transpose())
+
+        Linalg._check_reduced_dim(matrix_.shape[1], u.shape[1], reduced_dimension)
+
+        if not u.is_mostly_positive():
+            u = -u
+            v = -v
+
+        return u, s, v
+
+    @staticmethod
+    def _check_reduced_dim(no_columns, reduced_dim, requested_reduced_dim):
+        if requested_reduced_dim > no_columns:
+            warn("Number of columns smaller than the reduced dimensionality requested: %d < %d. Truncating to %d dimensions (rank)." % (no_columns, requested_reduced_dim, reduced_dim))
+        elif reduced_dim != requested_reduced_dim:
+            warn("Returning %d dimensions instead of %d." % (reduced_dim, requested_reduced_dim))
+
+    @staticmethod
+    def _nmf_nlssubprob(v, w, w_t, h_init, tol, maxiter):
+        """
+        h, grad: output solution and gradient
+        iteration: #iterations used
+        v, w: constant matrices
+        h_init: initial solution
+        tol: stopping tolerance
+        maxiter: limit of iterations
+        """
+        h = h_init
+        w_t_v = w_t * v
+        w_t_w = w_t * w
+
+        alpha = Linalg._NMF_ALPHA
+        beta = Linalg._NMF_BETA
+
+        #sub_loop_time = time()
+
+        for iteration in xrange(1, maxiter):
+            grad = w_t_w * h - w_t_v
+
+            # search step size
+            for inner_iter in xrange(1, 20):
+                hn = h - alpha * grad
+                hn = hn.get_non_negative()
+                d = hn - h
+                gradd = grad.multiply(d).sum()
+                dQd = (w_t_w * d).multiply(d).sum()
+                suff_decr = 0.99 * gradd + 0.5 * dQd < 0
+                if inner_iter == 1:
+                    decr_alpha = not suff_decr
+                    hp = h
+                if decr_alpha:
+                    if suff_decr:
+                        h = hn
+                        break
+                    else:
+                        alpha = alpha * beta
+                else:
+                    if not suff_decr or hp.all_close(hn):
+                        h = hp
+                        break
+                    else:
+                        alpha = alpha / beta
+                        hp = hn
+
+        return h, grad, iteration
+
+    @staticmethod
+    def nmf(v, w_init, h_init):
+        """
+        Performs Non-negative Matrix Factorization.
+
+        It solves the problem:
+        :math:`W,H = argmin(||X - WH||_2)` such that W and H are non-negative matrices.
+
+        Args:
+            w_init: initial value for matrix W, type Matrix
+            h_init: initial value for matrix H, type Matrix
+
+        Returns:
+            W, H <Matrix>: where W, H solve the NMF problem stated above.
+
+        """
+
+        log.print_info(logger, 4, "In NMF..reducing to dim %d" % w_init.shape[1])
+        log.print_matrix_info(logger, w_init, 5, "W init matrix:")
+        log.print_matrix_info(logger, h_init, 5, "H init matrix:")
+
+        if not isinstance(v, Matrix):
+            raise TypeError("expected Matrix type, received %s" % type(v))
+        w = w_init
+        h = h_init
+        init_time = time()
+
+        wt = w.transpose()
+        ht = h.transpose()
+        vt = v.transpose()
+        gradW = (w * (h * ht)) - (v * ht)
+        gradH = ((wt * w) * h) - (wt * v)
+
+        gradW_norm = gradW.norm()
+        gradH_norm = gradH.norm()
+        initgrad = sqrt(pow(gradW_norm, 2) + pow(gradH_norm, 2))
+
+        #print 'Init gradient norm %f' % initgrad
+        tolW = max(Linalg._NMF_MIN_TOL, Linalg._NMF_TOL) * initgrad
+        tolH = tolW
+
+        #loop_time = init_time
+        for iteration in xrange(1, Linalg._NMF_MAX_ITER):
+            log.print_info(logger, 5, "Iteration: %d(%d)" % (iteration, Linalg._NMF_MAX_ITER))
+
+            if time() - init_time > Linalg._NMF_TIME_LIMIT:
+                break
+
+            w, gradW, iterW = Linalg._nmf_nlssubprob(vt, h.transpose(), h,
+                                              w.transpose(), tolW,
+                                              Linalg._NMF_MAX_ITER_SUBPROB)
+            old_w = w
+            w = w.transpose()
+            gradW = gradW.transpose()
+
+            if iterW == 1:
+                tolW = Linalg._NMF_TOL_DECREASE_FACTOR * tolW
+
+            h, gradH, iterH = Linalg._nmf_nlssubprob(v, w, old_w, h, tolH,
+                                              Linalg._NMF_MAX_ITER_SUBPROB)
+
+            if iterH == 1:
+                tolH = Linalg._NMF_TOL_DECREASE_FACTOR * tolH
+
+        log.print_matrix_info(logger, w, 5, "Return W matrix:")
+        log.print_matrix_info(logger, h, 5, "Return H matrix:")
+        return w, h
+
+    @staticmethod
+    def pinv(matrix_):
+        """
+        Computes the pseudo-inverse of a matrix.
+
+        Args:
+            matrix_: input matrix, of type Matrix
+
+        Returns:
+            Pseudo-inverse of input matrix, of type Matrix
+
+        Raises:
+            TypeError, if input is not of type Matrix
+        """
+        if isinstance(matrix_, SparseMatrix):
+            return Linalg._sparse_pinv(matrix_)
+        elif isinstance(matrix_, DenseMatrix):
+            return Linalg._dense_pinv(matrix_)
+        else:
+            raise TypeError("expected Matrix type, received %s" % type(matrix_))
+
+    @staticmethod
+    def _dense_pinv(matrix_):
+        return DenseMatrix(np.linalg.pinv(matrix_.mat))
+
+    @staticmethod
+    def _sparse_pinv(matrix_):
+        # TODO: implement pinv
+        return SparseMatrix(np.linalg.pinv(matrix_.mat.todense()))
diff --git a/modules/composes/matrix/matrix.py b/modules/composes/matrix/matrix.py
new file mode 100755
index 0000000..d987204
--- /dev/null
+++ b/modules/composes/matrix/matrix.py
@@ -0,0 +1,152 @@
+'''
+Created on Sep 17, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from composes.utils.num_utils import is_numeric
+from composes.utils.py_matrix_utils import is_array
+
+class Matrix(object):
+    """
+    Provides a common interface for matrix implementations.
+
+    Provides a common interface for different matrix implementations
+    (sparse/dense). In vector space models, a matrix is used to encode
+    a set of entities such as words or phrases (rows) described in terms
+    of contextual features (columns).
+    """
+
+    def __init__(self, *args, **kwargs):
+        raise NotImplementedError()
+
+
+    def __add__(self, matrix_):
+        ''' + operation'''
+        self._assert_same_type(matrix_)
+        return type(self)(self.mat + matrix_.mat)
+
+    def __sub__(self, matrix_):
+        ''' - operation'''
+        self._assert_same_type(matrix_)
+        return type(self)(self.mat - matrix_.mat)
+
+    def __neg__(self):
+        ''' - operation'''
+        return type(self)(-self.mat)
+
+    def __mul__(self, factor):
+        ''' * operation'''
+        if is_numeric(factor):
+            return type(self)(self.mat * factor)
+        else:
+            self._assert_same_type(factor)
+            return type(self)(self.mat * factor.mat)
+
+    def __div__(self, factor):
+        ''' / operation'''
+        if is_numeric(factor):
+            if factor == 0:
+                raise ZeroDivisionError("Division by zero")
+        else:
+            raise TypeError("expected numeric type, received %s" % (type(factor)))
+        return type(self)(self.mat / float(factor))
+
+    def __rmul__(self, factor):
+        ''' * operation'''
+        if is_numeric(factor):
+            return self.__mul__(factor)
+        raise TypeError("expected numeric type, received %s" % (type(factor)))
+
+
+    #TODO move all these asserts somewhere else
+    def _assert_same_type(self, operand):
+        if type(self) != type(operand):
+            raise TypeError("expected matrix of type %s, received %s" %
+                             (type(self), type(operand)))
+
+    def assert_same_shape(self, matrix_):
+        """
+        Asserts that the matrix has the same shape as a second matrix.
+
+        Args:
+            matrix_: A second matrix of type Matrix.
+
+        Raises:
+            ValueError: If the current matrix and the argument matrix
+                do not have the same shape.
+        """
+
+        if self.mat.shape != matrix_.mat.shape:
+            raise ValueError("inconsistent shapes: %s %s"
+                             % (str(self.mat.shape), str(matrix_.mat.shape) ))
+
+    #TODO move all these asserts somewhere else
+    def _assert_array(self, operand):
+        if not is_array(operand):
+            raise TypeError("expected array, received %s" % (type(operand)))
+
+
+    def sum(self, axis=None):
+        #return type is dense matrix of shape (1, dimy) or (dimx,1)
+        #or a number if **kwargs is None
+        return self.mat.sum(axis)
+
+    def sorted_permutation(self, norm_function, axis_):
+        """
+        Computes the permutation resulted when sorting the matrix
+        on an axis, according to a function, in descending order.
+
+        Sorts the rows or the columns (as given by axis)
+        of a matrix according to a norm_function and returns
+        the permutation of this as a np.array
+
+        Args:
+            norm_function: One of sum/length. A function that
+                takes an axis as an argument (i.e. 0 or 1) and
+                returns an array of values (i.e. sum of all rows
+                if axis = 0 and norm_function = sum).
+
+            axis_: axis value, one of 0/1
+
+        Returns:
+            perm_srtd: np.array containing the permutation of the
+                sorting
+        """
+
+        #norms = norm_function(axis=axis_)
+
+        norms = norm_function(axis_).getA().flatten()
+        perm_srtd = sorted(range(len(norms)), key = norms.__getitem__,
+                           reverse=True)
+
+        return perm_srtd
+
+    def get_mat(self):
+        return self._mat
+
+    def set_mat(self, mat_):
+        self._mat = mat_
+
+    mat = property(get_mat, set_mat)
+    """
+    Stores the actual matrix structure of the Matrix object.
+    Of type numpy.matrix for DenseMatrix, and scipy.sparse.csr_matrix
+    for SparseMatrix.
+    """
+
+    def get_shape(self):
+        return self.mat.shape
+
+    shape = property(get_shape)
+    """
+    Shape of the matrix, tuple with two elements.
+    """
+
+    def copy(self):
+        return type(self)(self.mat.copy())
+
+
+
+
+
diff --git a/modules/composes/matrix/sparse_matrix.py b/modules/composes/matrix/sparse_matrix.py
new file mode 100755
index 0000000..563188b
--- /dev/null
+++ b/modules/composes/matrix/sparse_matrix.py
@@ -0,0 +1,413 @@
+'''
+Created on Sep 17, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+import numpy as np
+from warnings import warn
+from scipy.sparse import issparse
+from scipy.sparse import vstack
+from scipy.sparse import hstack
+from scipy.sparse import csr_matrix
+from scipy.sparse.sputils import isintlike
+from composes.utils.num_utils import is_numeric
+from composes.utils.num_utils import is_integer
+from composes.matrix.matrix import Matrix
+from composes.utils.py_matrix_utils import array_to_csr_diagonal
+from scipy.sparse import identity
+
+class SparseMatrix(Matrix):
+    '''
+    classdocs
+    '''
+
+
+    def __init__(self, data):
+        """
+        Constructor, creates a SparseMatrix from a numpy matrix-like
+        object.
+
+        Matrix-like objects (np.ndarray, np.matrix, scipy.sparse.matrix,
+         DenseMatrix) are converted into scipy.csr_matrix.
+
+        Args:
+            data: numpy matrix-like object or Matrix type
+
+        Raises:
+            TypeError: if input data is not one of scipy.sparse/
+            numpy.ndarray/numpy.matrix/Matrix
+            ValueError: if trying to initialize shape-0 matrix
+        """
+        if issparse(data):
+            self.mat = data.tocsr()
+
+        elif isinstance(data, np.matrix):
+            if data.shape[0] == 0 or data.shape[1] == 0:
+                raise ValueError("cannot initialize matrix with shape 0")
+            self.mat = csr_matrix(data)
+
+        elif isinstance(data, np.ndarray):
+            if len(data) == 0:
+                raise ValueError("cannot initialize matrix with shape 0")
+            self.mat = csr_matrix(data)
+
+        elif isinstance(data, Matrix):
+            self.mat = data.to_sparse_matrix().mat
+        else:
+            raise TypeError("expected scipy sparse matrix, received %s"
+                            % (type(data)))
+
+    def __str__(self):
+        return str(self.mat.todense())
+
+    def __getitem__(self, key):
+        """
+        Overwrites csr_matrix m[i,:], m[i] operations which are faulty in
+        current scipy.sparse releases.
+
+        """
+        def __get_row(row):
+            start = self.mat.indptr[row]
+            end = self.mat.indptr[row + 1]
+            return SparseMatrix(csr_matrix((self.mat.data[start:end],
+                                            self.mat.indices[start:end],
+                                            [0, end - start]),
+                                           shape=(1, self.mat.shape[1]),
+                                           copy=True))
+
+        if isinstance(key, tuple):
+            row = key[0]
+            col = key[1]
+            if isintlike(row) and row >= 0 and isinstance(col, slice):
+                if col == slice(None, None, None):
+                    return __get_row(row)
+
+        if isintlike(key) and key >= 0:
+            return __get_row(key)
+
+        result = self.mat[key]
+        if is_numeric(result):
+            return result
+        else:
+            return SparseMatrix(result)
+
+    def reshape(self, new_shape):
+        """
+        Reshapes current matrix.
+
+        Overwrites the current matrix with a new matrix of the
+        given shape!
+
+        Args:
+            shape: length 2 tuple or pair of integers
+
+        Raises:
+            ValueError: if shape is not an integer pair or
+                if new shape is inconsistent with the total
+                size of the current matrix.
+        """
+
+        if not isinstance(new_shape, tuple) or len(new_shape) != 2:
+            raise ValueError("shape must be integer pair")
+
+        no_rows, no_cols = self.mat.shape
+        new_no_rows, new_no_cols = new_shape
+
+        if not is_integer(new_no_rows) or not is_integer(new_no_cols):
+            raise ValueError("shape must be integer pair")
+        if no_rows * no_cols != new_no_rows * new_no_cols:
+            raise ValueError("total size of new matrix must be unchanged.")
+
+        #TODO: change here if we want a copy!!
+        mat = self.mat.tocoo(copy=False)
+
+        #upcast mat.row and mat.col
+        if no_rows * no_cols >=  2**31-1:
+            linear_pos = np.array(mat.row, dtype=np.int64) * no_cols + mat.col
+        else:
+            linear_pos = mat.row * no_cols + mat.col
+
+        mat.row = linear_pos // new_no_cols
+        mat.col = linear_pos - (mat.row * new_no_cols)
+
+        #NOTE: change here if we want a copy!!
+        self.mat = csr_matrix((mat.data, (mat.row, mat.col)), shape=new_shape)
+
+    @staticmethod
+    def identity(size):
+        """
+        Builds the identity matrix.
+
+        Args:
+            size: integer, the result matrix is of shape size x size
+
+        Returns:
+            Identity SparseMatrix.
+        """
+        # TODO: should do system-wise
+        return SparseMatrix(identity(size, dtype = np.double, format = "csr"))
+
+    def transpose(self):
+        """
+        Transposes the current matrix.
+
+        Returns:
+            SparseMatrix, a transpose of the current matrix.
+
+        """
+        return type(self)(self.mat.transpose())
+
+    def multiply(self, matrix_):
+        """
+        Computes component-wise multiplication of two matrices.
+
+        Args:
+            matrix_: a second matrix of type SparseMatrix
+
+        Returns:
+            A SparseMatrix containing the cw multiplication of the two.
+
+        Raises:
+            TypeError: if the argument is not of type SparseMatrix
+            ValueError: if the two matrices don t have the same shape.
+        """
+        self._assert_same_type(matrix_)
+        if self.mat.shape != matrix_.mat.shape:
+            raise ValueError("inconsistent shapes: %s %s"
+                             % (str(self.mat.shape), str(matrix_.mat.shape) ))
+
+        return SparseMatrix(self.mat.multiply(matrix_.mat))
+
+    def vstack(self, matrix_):
+        """
+        Vertical stack of two matrices.
+
+        Args:
+            matrix_: a second matrix of type SparseMatrix
+
+        Returns:
+            A SparseMatrix, vertical stack of the two matrices.
+
+        Raises:
+            TypeError: if the argument is not of type SparseMatrix
+
+        """
+        self._assert_same_type(matrix_)
+        return SparseMatrix(vstack([self.mat, matrix_.mat], format = "csr"))
+
+
+    def hstack(self, matrix_):
+        """
+        Horizontal stack of two matrices.
+
+        Args:
+            matrix_: a second matrix of type SparseMatrix
+
+        Returns:
+            A SparseMatrix, horizontal stack of the two matrices.
+
+        Raises:
+            TypeError: if the argument is not of type SparseMatrix
+
+        """
+        self._assert_same_type(matrix_)
+        return SparseMatrix(hstack([self.mat, matrix_.mat], format = "csr"))
+
+
+    @classmethod
+    def nary_vstack(cls, mat_list):
+        """
+        Class method, vertical stack of n matrices.
+
+        Args:
+            mat_list: a list of matrices of type SparseMatrix
+
+        Returns:
+            A SparseMatrix, vertical stack of the arguments.
+
+        """
+        np_mat_list = [matrix_.mat for matrix_ in mat_list]
+        return SparseMatrix(vstack(np_mat_list))
+
+    @classmethod
+    def nary_hstack(cls, mat_list):
+        """
+        Class method, horizontal stack of n matrices.
+
+        Args:
+            mat_list: a list of matrices of type SparseMatrix
+
+        Returns:
+            A SparseMatrix, horizontal stack of the arguments.
+
+        """
+
+        np_mat_list = [matrix_.mat for matrix_ in mat_list]
+        return SparseMatrix(hstack(np_mat_list))
+
+    def scale_rows(self, array_):
+        """
+        Scales each row of the matrix by the values given in an array.
+
+        Args:
+            array_: ndarray containing the values to scale by
+
+        Returns:
+            A new SparseMatrix with scaled rows.
+        """
+
+        self._assert_array(array_)
+
+        diag_matrix = array_to_csr_diagonal(array_)
+        return SparseMatrix(diag_matrix * self.mat)
+
+    def scale_columns(self, array_):
+        """
+        Scales each column of the matrix by the values given in an array.
+
+        Args:
+            array_: ndarray containing the values to scale by
+
+        Returns:
+            A new SparseMatrix with scaled columns.
+        """
+        self._assert_array(array_)
+
+        diag_matrix = array_to_csr_diagonal(array_)
+        return SparseMatrix(self.mat * diag_matrix)
+
+    def plog(self):
+        """
+        Applies positive log to the matrix elements.
+
+        Elements smaller than 1 (leading to not-defined log or negative log)
+        are set to 0. Log is applied on all other elements.
+
+        Modifies the current matrix.
+        """
+
+        self.mat.data[self.mat.data <= 1] = 1
+        self.mat.data = np.log(self.mat.data)
+        self.mat.eliminate_zeros()
+
+    def get_non_negative(self):
+        """
+        Turns negative entries to 0.
+
+        Returns:
+            A new SparseMatrix matrix in which negative entries are set to 0.
+
+        """
+        mat_ = self.mat.copy()
+        #TODO: time against : mat_.data[mat_.data < 0] = 0
+        mat_.data = np.where(mat_.data > 0, mat_.data, 0)
+        mat_.eliminate_zeros()
+        return SparseMatrix(mat_)
+
+    def to_non_negative(self):
+        """
+        Turns negative entries to 0.
+
+        Modifies the current matrix: all negative entries are set to 0.
+
+        """
+        self.mat.data.clip(0, out=self.mat.data)
+        self.mat.eliminate_zeros()
+
+    def to_ones(self):
+        """
+        Turns strictly positive entries to 1 and negative entries to 0.
+
+        Modifies the current matrix: all strictly positive entries are
+            set to 1, all negative entries are set to 0.
+
+        """
+        self.mat.data = np.where(self.mat.data > 0, 1, 0)
+        self.mat.eliminate_zeros()
+
+    def remove_small_values(self, epsilon):
+        """
+        Sets values smaller than an epsilon to 0.
+
+        Args:
+            epsilon: scalar, threshold
+        Returns:
+            A SparseMatrix in which all values smaller than epsilon are
+                set to 0.
+
+        """
+        mat_ = self.mat.copy()
+        mat_.data = np.where(mat_.data > epsilon, mat_.data, 0)
+        mat_.eliminate_zeros()
+        return SparseMatrix(mat_)
+
+    def assert_positive(self):
+        """
+        Asserts that all values are larger or equal to 0.
+
+        Raises:
+            ValueError if not all values are >= 0.
+        """
+        if not np.all(self.mat.data >= 0):
+            raise ValueError("expected non-negative matrix")
+
+    def is_mostly_positive(self):
+        """
+        Checks if more than 50% of the non zero elements of a
+        matrix are positive.
+
+        """
+        return self.mat.data[self.mat.data > 0].size > self.mat.data.size/2
+
+    def all_close(self, matrix_):
+        """
+        Checks of the values in two matrices are all_close.
+
+        Args:
+            matrix_: input matrix of type SparseMatrix
+
+        Returns:
+            bool: True if the elements are allclose (using np.allclose).
+
+        """
+        diff = self.mat - matrix_.mat
+        return np.allclose(diff.data, np.zeros(len(diff.data)))
+
+    def norm(self, axis = None):
+        """
+        Computes the norms on a certain axis or of the entire matrix.
+
+        Args:
+            axis: 0/1 or None, if axis is None computes the norm of the
+                full matrix
+        Returns:
+            nd.array containing the norms on a given axis, or a scalar
+            if the axis is None.
+
+        """
+        if axis is None:
+            return np.linalg.norm(self.mat.data)
+        else:
+            return np.sqrt(self.multiply(self).sum(axis))
+
+    def to_dense_matrix(self):
+        """
+        Converts to DenseMatrix.
+        """
+        from composes.matrix.dense_matrix import DenseMatrix
+        return DenseMatrix(self.mat)
+
+    def to_sparse_matrix(self, copy = False):
+        """
+        Returns a copy is copy=True, returns self otherwise.
+        """
+        if (copy):
+            return self.copy()
+        else:
+            return self
+
+
+
+
+
diff --git a/modules/composes/semantic_space/__init__.py b/modules/composes/semantic_space/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/semantic_space/operation.py b/modules/composes/semantic_space/operation.py
new file mode 100755
index 0000000..a9f9dbc
--- /dev/null
+++ b/modules/composes/semantic_space/operation.py
@@ -0,0 +1,248 @@
+'''
+Created on Jun 6, 2012
+
+@author: thenghia.pham
+'''
+
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.exception.illegal_state_error import IllegalStateError
+from composes.utils.matrix_utils import resolve_type_conflict
+from warnings import warn
+
+class Operation(object):
+    """
+    This class implements both the application, and the projection of a
+    transformation on a semantic space.
+
+    An operation object can be used to apply or to project a specific
+    transformation on a semantic space.
+    After a transformation is applied, for example on a core space, the operation
+    class stores the information required to further project this same
+    operation onto a space peripheral to the core space.
+    """
+
+    def __init__(self):
+        """
+        Constructor
+        """
+        pass
+
+    def _raise_projection_error(self, transformation):
+        raise IllegalStateError("Illegal projection of %s. Attempting\
+                                 projection before application."
+                                 % (transformation))
+
+    def _raise_double_application_error(self, transformation):
+        raise IllegalStateError("Illegal application of %s. Attempting\
+                                     double application." % (transformation))
+
+class ScalingOperation(Operation):
+    """
+    This class implements the application and the projection of scaling
+    transformations.
+    """
+
+    def __init__(self, scaling):
+        self.__scaling = scaling
+        self.__column_stats = None
+
+    def apply(self, matrix_):
+        """
+        Applies a scaling operation.
+
+        Args:
+            matrix_: matrix on which the scaling is applied, of type Matrix
+
+        Returns:
+            the scaled matrix
+
+        The column statistics computed by the scaling transformation, if any,
+        is stored in the current operation object. For example, PPMI scaling
+        needs column sums in order to be projected on peripheral spaces,
+        while PLOG scaling does not require this.
+
+        """
+
+        if not self.__column_stats is None:
+            self._raise_double_application_error(self.__scaling)
+
+        result_matrix = self.__scaling.apply(matrix_)
+
+        if self.__scaling.uses_column_stats:
+            self.__column_stats = self.__scaling.get_column_stats(matrix_)
+
+        return result_matrix
+
+    def project(self, matrix_):
+        """
+        Projects a scaling operation.
+
+        Args:
+            matrix_: matrix on which the scaling is projected, of type Matrix
+
+        Returns:
+            the scaled matrix
+
+        If the current operation object has column_stats, this structure is
+        used in the projection.
+        """
+
+        if self.__column_stats is None and self.__scaling.uses_column_stats:
+            self._raise_projection_error(self.__scaling)
+
+        if self.__scaling.uses_column_stats:
+            return self.__scaling.apply(matrix_, self.__column_stats)
+        else:
+            return self.__scaling.apply(matrix_)
+
+    def __str__(self):
+        return str(self.__scaling)
+
+
+class DimensionalityReductionOperation(Operation):
+    """
+    This class implements the application and the projection of dimensionality
+    reduction transformations.
+    """
+
+    def __init__(self, dim_reduction):
+        self.__dim_reduction = dim_reduction
+        self.__transmat = None
+
+    def apply(self, matrix_):
+        """
+        Applies a dim. reduction operation.
+
+        Args:
+            matrix_: matrix on which the reduction is applied, of type Matrix
+
+        Returns:
+            the reduced matrix
+
+        The transformation matrix obtained in the reduction (specific to each
+        reduction method) is stored in the operation object. This transformation
+        matrix is further used for projecting the dim. reduction method on
+        a space peripheral to the space on which it has been originally applied.
+        """
+
+        if not self.__transmat is None:
+            self._raise_double_application_error(self.__dim_reduction)
+
+        res_mat, self.__transmat = self.__dim_reduction.apply(matrix_)
+
+        return DenseMatrix(res_mat)
+
+    def project(self, matrix_):
+        """
+        Projects a dim. reduction operation.
+
+        Args:
+            matrix_: matrix on which the reduction is projected, of type Matrix
+
+        Returns:
+            the reduced matrix
+
+        Uses the transformation matrix stored in the operation object to project
+        the dimensionality reduction method on a new space, peripheral to the
+        original one.
+        """
+
+        if self.__transmat is None:
+            self._raise_projection_error(self.__dim_reduction)
+
+        if self.__dim_reduction.name == "nmf":
+            matrix_.assert_positive()
+
+        if not isinstance(matrix_, type(self.__transmat)):
+            warn("WARNING: peripheral matrix type (dense/sparse) should be the same as the core space matrix type!!")
+
+        [matrix_, transmat] = resolve_type_conflict([matrix_, self.__transmat],
+                                                        type(matrix_))
+
+        result_mat = matrix_ * transmat
+
+        if self.__dim_reduction.name == "nmf":
+            result_mat.to_non_negative()
+
+        return DenseMatrix(result_mat)
+
+    def __str__(self):
+        return str(self.__dim_reduction)
+
+
+class FeatureSelectionOperation(Operation):
+    """
+    This class implements the application and the projection of feature
+    selection transformations.
+    """
+
+    def __init__(self, feat_selection):
+        self.__feat_selection = feat_selection
+        self.__selected_columns = None
+        self.__original_columns = None
+
+    def apply(self, matrix_):
+        """
+        Applies a dim. feature selection operation.
+
+        Args:
+            matrix_: matrix on which the reduction is applied, of type Matrix
+
+        Returns:
+            the reduced matrix
+
+        The columns selected are stored in the operation object. These are
+        further used for projecting the feature selection method on
+        a space peripheral to the original space on which it has been applied.
+        """
+
+        if not self.__selected_columns is None:
+            self._raise_double_application_error(self.__feat_selection)
+
+        res_mat, self.__selected_columns = self.__feat_selection.apply(matrix_)
+        return res_mat
+
+    def project(self, matrix_):
+        """
+        Projects a feature selection operation.
+
+        Args:
+            matrix_: matrix on which the selection is applied, of type Matrix
+
+        Returns:
+            the reduced matrix
+
+        Uses the information on selected columns stored in the operation
+        object to project the feature selection method on a new space,
+        peripheral to the original one.
+        """
+
+        if self.__selected_columns is None:
+            self._raise_projection_error(self.__dim_reduction)
+
+        res_mat = matrix_[:, self.__selected_columns]
+        return res_mat
+
+
+    def __str__(self):
+        return str(self.__feat_selection)
+
+
+    def get_selected_columns(self):
+        return self.__selected_columns
+
+    def get_original_columns(self):
+        return self.__original_columns
+
+    def set_original_columns(self, original_columns):
+        self.__original_columns = original_columns
+
+    selected_columns = property(get_selected_columns)
+    """
+    List of integers, indices of the columns selected.
+    """
+    original_columns = property(get_original_columns, set_original_columns)
+    """
+    List of strings, the id2column of the space before applying the
+    feature selection.
+    """
diff --git a/modules/composes/semantic_space/peripheral_space.py b/modules/composes/semantic_space/peripheral_space.py
new file mode 100755
index 0000000..a553f5c
--- /dev/null
+++ b/modules/composes/semantic_space/peripheral_space.py
@@ -0,0 +1,160 @@
+'''
+Created on Sep 26, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from space import Space
+from numpy import array
+from composes.utils.space_utils import list2dict
+from composes.utils.space_utils import assert_dict_match_list
+from composes.utils.space_utils import assert_shape_consistent
+from composes.utils.space_utils import add_items_to_dict
+from composes.semantic_space.operation import FeatureSelectionOperation
+from composes.semantic_space.operation import DimensionalityReductionOperation
+from composes.utils.gen_utils import assert_is_instance
+from composes.matrix.matrix import Matrix
+
+class PeripheralSpace(Space):
+    '''
+    classdocs
+    '''
+
+
+    def __init__(self, core_space, matrix_, id2row, row2id=None):
+        """
+        Constructor.
+
+        Args:
+            core_space: Space type, the core space that this is peripheral to.
+            matrix_: Matrix type, the data matrix of the space
+            id2row: list, the row elements
+            row2id: dictionary, maps row strings to ids. Optional, built from
+                id2row by default.
+
+        Returns:
+             A peripheral semantic space (type PeripheralSpace) on which the
+             core space operations have been projected. Column indexing structures
+             and operations are taken over from the core space.
+
+        Raises:
+            TypeError: if matrix_ or core_space are not of the correct type
+            ValueError: if element shape is not consistent with
+                         the size of matrix rows
+                        if the matrix and the provided row and column
+                         indexing structures are not of consistent shapes.
+        """
+        assert_is_instance(matrix_, Matrix)
+        assert_is_instance(core_space, Space)
+        assert_is_instance(id2row, list)
+        # TODO: assert it is not a peripheral space here!
+
+        if row2id is None:
+            row2id = list2dict(id2row)
+        else:
+            assert_dict_match_list(row2id, id2row)
+
+        column2id = core_space.column2id
+        id2column = core_space.id2column
+
+        self._operations = list(core_space.operations)
+        self._row2id = row2id
+        self._id2row = id2row
+        self._column2id = column2id
+        self._id2column = id2column
+
+        self._cooccurrence_matrix = self._project_core_operations(matrix_)
+        assert_shape_consistent(self.cooccurrence_matrix, self._id2row,
+                                 self._id2column, self._row2id, self._column2id)
+
+        self._element_shape = (self._cooccurrence_matrix.shape[1],)
+
+
+    def _project_core_operations(self, matrix_):
+
+        for operation in self._operations:
+            if isinstance(operation, DimensionalityReductionOperation):
+                self._id2column, self._column2id = [], {}
+
+            if isinstance(operation, FeatureSelectionOperation):
+                if operation.original_columns:
+                    self._id2column = list(array(operation.original_columns)[operation.selected_columns])
+                    self._column2id = list2dict(self._id2column)
+                else:
+                    self._id2column, self._column2id = [],{}
+
+            matrix_ = operation.project(matrix_)
+        return matrix_
+
+
+    def add_rows(self, matrix_, id2row):
+        """
+        Adds rows to a peripheral space.
+
+        Args:
+            matrix_: Matrix type, the matrix of the elements to be added.
+            id2row: list, string identifiers of the rows to be added.
+
+        Modifies the current space by appending the new rows.
+        All operations of the core space are projected to the new rows.
+
+        Raises:
+            ValueError: if attempting to add row strings which are already
+                        in the space.
+                        matrix of the new data is not consistent in shape
+                        with the current data matrix.
+        """
+
+        try:
+            self._row2id = add_items_to_dict(self.row2id, id2row)
+        except ValueError:
+            raise ValueError("Found duplicate keys when appending rows to\
+                            peripheral space.")
+
+        if matrix_.mat.shape[0] != len(id2row):
+            raise ValueError("Matrix shape inconsistent with no. of rows:%s %s"
+                              % (matrix_.mat.shape, len(id2row)))
+
+        self._id2row = self.id2row + id2row
+        matrix_ = self._project_core_operations(matrix_)
+
+        self._cooccurrence_matrix = self._cooccurrence_matrix.vstack(matrix_)
+        assert_shape_consistent(self.cooccurrence_matrix, self.id2row,
+                                 self.id2column, self.row2id, self.column2id)
+
+    @classmethod
+    def build(cls, core_space, **kwargs):
+        """
+        Reads in data files and extracts the data to construct a semantic space.
+
+        If the data is read in dense format and no columns are provided,
+        the column indexing structures are set to empty.
+
+        Args:
+            data: file containing the counts
+            format: format on the input data file: one of sm/dm
+            rows: file containing the row elements. Optional, if not provided,
+                extracted from the data file.
+            cols: file containing the column elements
+
+        Returns:
+            A semantic space build from the input data files.
+
+        Raises:
+            ValueError: if one of data/format arguments is missing.
+                        if cols is missing and format is "sm"
+                        if the input columns provided are not consistent with
+                        the shape of the matrix (for "dm" format)
+
+        """
+
+        sp = Space.build(**kwargs)
+
+        mat = sp._cooccurrence_matrix
+        id2row = sp.id2row
+        row2id = sp.row2id
+        return PeripheralSpace(core_space, mat, id2row, row2id)
+
+
+
+
diff --git a/modules/composes/semantic_space/space.py b/modules/composes/semantic_space/space.py
new file mode 100755
index 0000000..df29e04
--- /dev/null
+++ b/modules/composes/semantic_space/space.py
@@ -0,0 +1,649 @@
+'''
+Created on Sep 21, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+import time
+import logging
+from numpy import array
+from numpy import prod
+from composes.utils.space_utils import list2dict
+from composes.utils.space_utils import assert_dict_match_list
+from composes.utils.space_utils import assert_shape_consistent
+from composes.utils.gen_utils import assert_is_instance
+from composes.utils.space_utils import add_items_to_dict
+from composes.utils.matrix_utils import resolve_type_conflict
+from composes.utils.matrix_utils import get_type_of_largest
+from composes.matrix.matrix import Matrix
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.matrix.sparse_matrix import SparseMatrix
+from composes.semantic_space.operation import FeatureSelectionOperation
+from composes.semantic_space.operation import DimensionalityReductionOperation
+from composes.similarity.similarity import Similarity
+from composes.transformation.scaling.scaling import Scaling
+from composes.transformation.dim_reduction.dimensionality_reduction import DimensionalityReduction
+from composes.transformation.feature_selection.feature_selection import FeatureSelection
+from composes.exception.illegal_state_error import IllegalOperationError
+from composes.utils import log_utils as log
+from composes.utils.io_utils import read_sparse_space_data
+from composes.utils.io_utils import extract_indexing_structs
+from composes.utils.io_utils import read_dense_space_data
+from composes.utils.io_utils import create_parent_directories
+from composes.utils.io_utils import print_list
+from composes.utils.io_utils import print_cooc_mat_dense_format
+from composes.utils.io_utils import print_cooc_mat_sparse_format
+
+
+logger = logging.getLogger(__name__)
+
+
+class Space(object):
+    """
+    This class implements semantic spaces.
+
+    A semantic space describes a list of targets (words, phrases, etc.)
+    in terms of co-occurrence with contextual features.
+
+    It contains a matrix storing (some type of) co-occurrence
+    strength values between targets and contextual features: by convention,
+    targets are rows and features are columns. The space also stores structures
+    that encode the mappings between the matrix row/column indices and the
+    associated target/context-feature strings.
+
+    Transformations which rescale the matrix elements can be applied
+    to a semantic space. A semantic also space allows for similarity
+    computations between row elements of the space.
+
+    """
+
+    def __init__(self, matrix_, id2row, id2column, row2id=None, column2id=None,
+                 operations=[], element_shape=None):
+        """
+        Constructor.
+
+        Args:
+            matrix_: Matrix type, the data matrix of the space
+            id2row: list, the row elements
+            id2column: list, the column elements
+            row2id: dictionary, maps row strings to ids. Optional, built from
+                id2row by default.
+            column2id: dictionary, maps col strings to ids. Optional, built
+                from id2column by default
+            operations: list of operations already performed on the input
+                matrix, Optional, by default set to empty.
+            element_shape: tuple of int, the shape on row elements. Optional,
+                by default row elements are one-dimensional and element_shape is
+                (no_cols, ). Used in 3D composition.
+
+         Returns:
+             A semantic space (type Space)
+
+         Raises:
+             TypeError: if matrix_ is not of the correct type
+             ValueError: if element shape is not consistent with
+                         the size of matrix rows
+                         if the matrix and the provided row and column
+                         indexing structures are not of consistent shapes.
+
+        """
+        assert_is_instance(matrix_, Matrix)
+        assert_is_instance(id2row, list)
+        assert_is_instance(id2column, list)
+
+        if row2id is None:
+            row2id = list2dict(id2row)
+        else:
+            assert_dict_match_list(row2id, id2row)
+
+        if column2id is None:
+            column2id = list2dict(id2column)
+        else:
+            assert_dict_match_list(column2id, id2column)
+
+        assert_shape_consistent(matrix_, id2row, id2column, row2id, column2id)
+
+        self._cooccurrence_matrix = matrix_
+        self._row2id = row2id
+        self._id2row = id2row
+        self._column2id = column2id
+        self._id2column = id2column
+        self._operations = operations
+
+        if element_shape:
+            if prod(element_shape) != self._cooccurrence_matrix.shape[1]:
+                raise ValueError("Trying to assign invalid element shape:\
+                                    element_shape: %s, matrix columns: %s"
+                                 % (str(element_shape),
+                                    str(self._cooccurrence_matrix.shape[1])))
+
+        # NOTE: watch out here, can cause bugs, if we change the dimension
+        # of a regular space and we do not create a new space
+            self._element_shape = element_shape
+        else:
+            self._element_shape = (self._cooccurrence_matrix.shape[1],)
+
+
+    def apply(self, transformation):
+        """
+        Applies a transformation on the current space.
+
+        All transformations affect the data matrix. If the transformation
+        reduces the dimensionality of the space, the column indexing
+        structures are also updated. The operation applied is appended
+        to the list of operations that the space holds.
+
+        Args:
+            transformation: of type Scaling, DimensionalityReduction or
+              FeatureSelection
+
+        Returns:
+            A new space on which the transformation has been applied.
+
+        """
+        start = time.time()
+        #TODO , FeatureSelection, DimReduction ..
+        assert_is_instance(transformation, (Scaling, DimensionalityReduction,
+                                            FeatureSelection))
+        op = transformation.create_operation()
+        new_matrix =  op.apply(self.cooccurrence_matrix)
+
+        new_operations = list(self.operations)
+        new_operations.append(op)
+
+        id2row, row2id = list(self.id2row), self.row2id.copy()
+
+
+        if isinstance(op, DimensionalityReductionOperation):
+            self.assert_1dim_element()
+            id2column, column2id = [], {}
+        elif isinstance(op, FeatureSelectionOperation):
+            self.assert_1dim_element()
+            op.original_columns = self.id2column
+
+            if op.original_columns:
+                id2column = list(array(op.original_columns)[op.selected_columns])
+                column2id = list2dict(id2column)
+            else:
+                id2column, column2id = [],{}
+        else:
+            id2column, column2id = list(self.id2column), self.column2id.copy()
+
+        log.print_transformation_info(logger, transformation, 1,
+                                      "\nApplied transformation:")
+        log.print_matrix_info(logger, self.cooccurrence_matrix, 2,
+                              "Original semantic space:")
+        log.print_matrix_info(logger, new_matrix, 2, "Resulted semantic space:")
+        log.print_time_info(logger, time.time(), start, 2)
+
+        return Space(new_matrix, id2row, id2column,
+                     row2id, column2id, operations = new_operations)
+
+    def get_sim(self, word1, word2, similarity, space2=None):
+        """
+        Computes the similarity between two targets in the semantic
+        space.
+
+        If one of the two targets to be compared is not found, it returns 0..
+
+        Args:
+            word1: string
+            word2: string
+            similarity: of type Similarity, the similarity measure to be used
+            space2: Space type, Optional. If provided, word2 is interpreted in
+                this space, rather than the current space. Default, both words
+                are interpreted in the current space.
+        Returns:
+            scalar, similarity score
+
+        """
+
+        assert_is_instance(similarity, Similarity)
+
+        try:
+            v1 = self.get_row(word1)
+        except KeyError:
+            print "Row string %s not found, returning 0.0" % (word1)
+            return 0.0
+        try:
+            if space2 is None:
+                v2 = self.get_row(word2)
+            else:
+                v2 = space2.get_row(word2)
+        except KeyError:
+            print "Row string %s not found, returning 0.0" % (word2)
+            return 0.0
+
+        [v1, v2] = resolve_type_conflict([v1, v2], DenseMatrix)
+        return similarity.get_sim(v1, v2)
+
+    def get_sims(self, word_pair_list, similarity, space2=None):
+        """
+        Computes the similarity between two LIST of targets in the semantic
+        space.
+
+        If one of the two targets to be compared is not found, it returns 0..
+
+        Args:
+            word_pair_list: list of (string, string) tuples. Words to be compared.
+            similarity: of type Similarity, the similarity measure to be used
+            space2: Space type, Optional. If provided, the second word of the word pairs
+                is interpreted in this space, rather than the current space.
+                Default, both words are interpreted in the current space.
+        Returns:
+            list, list of similarity scores
+
+        """
+        sims = []
+
+        for word1, word2 in word_pair_list:
+            sims.append(self.get_sim(word1, word2, similarity, space2))
+
+        return sims
+
+    def get_neighbours(self, word, no_neighbours, similarity,
+                       space2=None):
+        """
+        Computes the neighbours of a word in the semantic space.
+
+        Args:
+            word: string, target word
+            no_neighbours: int, the number of neighbours desired
+            similarity: of type Similarity, the similarity measure to be used
+            space2: Space type, Optional. If provided, the neighbours are
+                retrieved from this space, rather than the current space.
+                Default, neighbours are retrieved from the current space.
+
+        Returns:
+            list of (neighbour_string, similarity_value) tuples.
+
+        Raises:
+            KeyError: if the word is not found in the semantic space.
+
+        """
+
+        start = time.time()
+        assert_is_instance(similarity, Similarity)
+        vector = self.get_row(word)
+
+        if space2 is None:
+            id2row = self.id2row
+            sims_to_matrix = similarity.get_sims_to_matrix(vector,
+                                                          self.cooccurrence_matrix)
+        else:
+            mat_type = type(space2.cooccurrence_matrix)
+            if not isinstance(vector, mat_type):
+                vector = mat_type(vector)
+
+            sims_to_matrix = similarity.get_sims_to_matrix(vector,
+                                         space2.cooccurrence_matrix)
+            id2row = space2.id2row
+
+        sorted_perm = sims_to_matrix.sorted_permutation(sims_to_matrix.sum, 1)
+        no_neighbours = min(no_neighbours, len(id2row))
+        result = []
+
+        for count in range(no_neighbours):
+            i = sorted_perm[count]
+            result.append((id2row[i], sims_to_matrix[i,0]))
+
+        log.print_info(logger, 1, "\nGetting neighbours of:%s" % (word))
+        log.print_name(logger, similarity, 1, "Similarity:")
+        log.print_time_info(logger, time.time(), start, 2)
+        return result
+
+    @classmethod
+    def vstack(cls, space1, space2):
+        """
+        Classmethod. Stacks two semantic spaces.
+
+        The rows in the two spaces are concatenated.
+
+        Args:
+            space1, space2: spaces to be stacked, of type Space
+
+        Returns:
+            Stacked space, type Space.
+
+        Raises:
+            ValueError: if the spaces have different number of columns
+                        or their columns are not identical
+
+        """
+        if space1.cooccurrence_matrix.shape[1] != space2.cooccurrence_matrix.shape[1]:
+            raise ValueError("Inconsistent shapes: %s, %s"
+                             % (space1.cooccurrence_matrix.shape[1],
+                                space2.cooccurrence_matrix.shape[1]))
+
+        if space1.id2column != space2.id2column:
+            raise ValueError("Identical columns required")
+
+        new_row2id = add_items_to_dict(space1.row2id.copy(), space2.id2row)
+        new_id2row = space1.id2row + space2.id2row
+
+        matrix_type = get_type_of_largest([space1.cooccurrence_matrix,
+                                           space2.cooccurrence_matrix])
+        [new_mat1, new_mat2] = resolve_type_conflict([space1.cooccurrence_matrix,
+                                                      space2.cooccurrence_matrix],
+                                                     matrix_type)
+
+        new_mat = new_mat1.vstack(new_mat2)
+
+        log.print_info(logger, 1, "\nVertical stack of two spaces")
+        log.print_matrix_info(logger, space1.cooccurrence_matrix, 2,
+                              "Semantic space 1:")
+        log.print_matrix_info(logger, space2.cooccurrence_matrix, 2,
+                              "Semantic space 2:")
+        log.print_matrix_info(logger, new_mat, 2, "Resulted semantic space:")
+
+        return Space(new_mat, new_id2row, list(space1.id2column), new_row2id,
+                     space1.column2id.copy(), operations=[])
+
+    def to_dense(self):
+        """
+        Converts the matrix of the current space to DenseMatrix
+        """
+        self._cooccurrence_matrix = DenseMatrix(self.cooccurrence_matrix)
+
+    def to_sparse(self):
+        """
+        Converts the matrix of the current space to SparseMatrix
+        """
+        self._cooccurrence_matrix = SparseMatrix(self.cooccurrence_matrix)
+
+    def get_row(self, word):
+        """
+        Returns the row vector of a word.
+
+        Args:
+            word: string
+
+        Returns: Matrix type (of shape (1, no_cols)), the row of the word argument.
+
+        Raises:
+            KeyError: if the word is not found in the space
+        """
+        return self.cooccurrence_matrix[self.row2id[word],:]
+
+    def get_rows(self, words):
+        """
+        Returns the sub-matrix corresponding to a list of words.
+
+        Args:
+            words: list of strings
+
+        Returns: Matrix type (of shape (len(words), no_cols)),
+                 the sub-matrix containing the words given as an input.
+
+        Raises:
+            KeyError: if one of words is not found in the space
+        """
+        assert_is_instance(words, list)
+        row_ids = []
+        for word in words:
+            row_ids.append(self.row2id[word])
+
+        return self.cooccurrence_matrix[row_ids,:]
+
+    def set_cooccurrence_matrix(self, matrix_):
+        assert_is_instance(matrix_, Matrix)
+        assert_shape_consistent(matrix_, self.row2id, self.id2row,
+                                       self.column2id, self.id2column)
+        self._cooccurrence_matrix = matrix_
+
+    def get_cooccurrence_matrix(self):
+        return self._cooccurrence_matrix
+
+    cooccurrence_matrix = property(get_cooccurrence_matrix)
+    """
+    Co-occurrence matrix associated to the semantic space, of type Matrix.
+
+    """
+    def get_row2id(self):
+        return self._row2id
+
+    row2id = property(get_row2id)
+    """
+    Dictionary, maps row strings to integer ids.
+    """
+
+    def get_id2row(self):
+        return self._id2row
+
+    id2row = property(get_id2row)
+    """
+    List of strings, the row elements.
+    """
+    def get_column2id(self):
+        return self._column2id
+
+    column2id = property(get_column2id)
+    """
+    Dictionary, maps column strings to integer ids.
+    """
+
+    def get_id2column(self):
+        return self._id2column
+
+    id2column = property(get_id2column)
+    """
+    List of strings, the column elements.
+    """
+
+    def get_element_shape(self):
+        return self._element_shape
+
+    element_shape = property(get_element_shape)
+    """
+    Shape of row elements, of type tuple. By default, in standard spaces,
+    element_shape=(no_cols,).
+
+    Used in composition models which build
+    word representations which are matrices or higher order tensors, instead
+    of simple vectors. If the representation of a word is a matrix of shape
+    (2,2) for example, then element_shape=(2,2). The actual space matrix
+    stores each element as a linearized vector, just as in standard spaces.
+    """
+
+    def get_operations(self):
+        return self._operations
+
+    operations = property(get_operations)
+    """
+    List of operations which have been applied on the semantic space. List of
+    Operation type objects.
+
+    The operations, together with their associated side information, are stored
+    because they may need to be projected on peripheral data.
+    """
+
+    def assert_1dim_element(self):
+        """
+        Asserts that the elements of the space are one dimensional.
+
+        """
+        if len(self.element_shape) > 1:
+            raise IllegalOperationError("Operation not allowed on spaces with\
+                                       element shape: %s" % self.element_shape)
+
+    @classmethod
+    def build(cls, **kwargs):
+        """
+        Reads in data files and extracts the data to construct a semantic space.
+
+        If the data is read in dense format and no columns are provided,
+        the column indexing structures are set to empty.
+
+        Args:
+            data: file containing the counts
+            format: format on the input data file: one of sm/dm
+            rows: file containing the row elements. Optional, if not provided,
+                extracted from the data file.
+            cols: file containing the column elements
+
+        Returns:
+            A semantic space build from the input data files.
+
+        Raises:
+            ValueError: if one of data/format arguments is missing.
+                        if cols is missing and format is "sm"
+                        if the input columns provided are not consistent with
+                        the shape of the matrix (for "dm" format)
+
+        """
+        start = time.time()
+        id2row = None
+        id2column = None
+
+        if "data" in kwargs:
+            data_file = kwargs["data"]
+        else:
+            raise ValueError("Space data file needs to be specified")
+
+        if "format" in kwargs:
+            format_ = kwargs["format"]
+            if not format_ in ["dm","sm"]:
+                raise ValueError("Unrecognized format: %s" % format_)
+        else:
+            raise ValueError("Format of input files needs to be specified")
+
+        if "rows" in kwargs and not kwargs["rows"] is None:
+            [id2row], [row2id] = extract_indexing_structs(kwargs["rows"], [0])
+
+        if "cols" in kwargs and not kwargs["cols"] is None:
+            [id2column], [column2id] = extract_indexing_structs(kwargs["cols"], [0])
+        elif format_ == "sm":
+            raise ValueError("Need to specify column file when input format is sm!")
+
+        if format_ == "sm":
+            if id2row is None:
+                [id2row], [row2id] = extract_indexing_structs(data_file, [0])
+            mat = read_sparse_space_data(data_file, row2id, column2id)
+
+        else:
+            if id2row is None:
+                [id2row],[row2id] = extract_indexing_structs(data_file, [0])
+            mat = read_dense_space_data(data_file, row2id)
+
+        if id2column and len(id2column) != mat.shape[1]:
+            raise ValueError("Columns provided inconsistent with shape of input matrix!")
+
+        if id2column is None:
+            id2column, column2id = [], {}
+
+        log.print_matrix_info(logger, mat, 1, "Built semantic space:")
+        log.print_time_info(logger, time.time(), start, 2)
+        return Space(mat, id2row, id2column, row2id, column2id)
+
+    def export(self, file_prefix, **kwargs):
+        """
+        Exports the current space to disk.
+        If the space has no column information, it cannot be exported in
+        sparse format (sm).
+
+        Args:
+            file_prefix: string, prefix of the files to be exported
+            format: string, one of dm/sm
+
+        Prints:
+            - matrix in file_prefix.<format>
+            - row elements in file_prefix.<row>
+            - col elements in file_prefix.<col>
+
+        Raises:
+            ValueError: if the space has no column info and "sm" exporting
+                is attempted
+            NotImplementedError: the space matrix is dense and "sm" exporting
+                is attempted
+
+        """
+
+        start = time.time()
+        create_parent_directories(file_prefix)
+        format_ = "dm"
+        if "format" in kwargs:
+            format_ = kwargs["format"]
+            if not format_ in ["dm","sm"]:
+                raise ValueError("Unrecognized format: %s" %format_)
+            elif format_ == "dm":
+                print_cooc_mat_dense_format(self.cooccurrence_matrix,
+                                            self.id2row, file_prefix)
+            else:
+                print_cooc_mat_sparse_format(self.cooccurrence_matrix,
+                                             self.id2row,
+                                             self.id2column, file_prefix)
+        self._export_row_column(file_prefix)
+
+        log.print_matrix_info(logger, self.cooccurrence_matrix, 1,
+                              "Printed semantic space:")
+        log.print_time_info(logger, time.time(), start, 2)
+
+    def _export_row_column(self, file_prefix):
+        row_file = "%s.%s" %(file_prefix, "rows")
+        column_file = "%s.%s" %(file_prefix, "cols")
+
+        if self.column2id:
+            print_list(self.id2column, column_file)
+
+        print_list(self.id2row, row_file)
+
+
+
+"""
+def build(cls, **kwargs):
+FANCY BUILD
+        start = time.time()
+        id2row = None
+        id2column = None
+
+        if "data" in kwargs:
+            data_file = kwargs["data"]
+        else:
+            raise ValueError("Space data file needs to be specified")
+
+        if "format" in kwargs:
+            format_ = kwargs["format"]
+            if not format_ in ["dm","sm"]:
+                raise ValueError("Unrecognized format: %s" % format_)
+        else:
+            raise ValueError("Format of input files needs to be specified")
+
+        if "rows" in kwargs and not kwargs["rows"] is None:
+            [id2row], [row2id] = extract_indexing_structs(kwargs["rows"], [0])
+
+        if "cols" in kwargs and not kwargs["cols"] is None:
+            [id2column], [column2id] = extract_indexing_structs(kwargs["cols"], [0])
+
+        if format_ == "sm":
+            if id2row is None and id2column is None:
+                ([id2row, id2column],
+                 [row2id, column2id]) = extract_indexing_structs(data_file, [0, 1])
+            if id2row is None:
+                [id2row], [row2id] = extract_indexing_structs(data_file, [0])
+            if id2column is None:
+                [id2column], [column2id] = extract_indexing_structs(data_file, [1])
+
+            mat = read_sparse_space_data(data_file, row2id, column2id)
+        else:
+            if id2row is None:
+                [id2row],[row2id] = extract_indexing_structs(data_file, [0])
+            if id2column is None:
+                id2column, column2id = [], {}
+
+            mat = read_dense_space_data(data_file, row2id)
+
+        if id2column and len(id2column) != mat.shape[1]:
+            raise ValueError("Columns provided inconsistent with shape of input matrix!")
+
+        log.print_matrix_info(logger, mat, 1, "Built semantic space:")
+        log.print_time_info(logger, time.time(), start, 2)
+        return Space(mat, id2row, id2column, row2id, column2id)
+FANCY BUILD
+
+
+
+    Some transformations, such as weighings, only scale the values
+    in the space matrix, while others, such as dimensionality
+    reduction, or feature selection, alter the set of
+    contextual features.
+"""
diff --git a/modules/composes/similarity/__init__.py b/modules/composes/similarity/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/similarity/cos.py b/modules/composes/similarity/cos.py
new file mode 100755
index 0000000..b4f6038
--- /dev/null
+++ b/modules/composes/similarity/cos.py
@@ -0,0 +1,39 @@
+"""
+Created on Oct 2, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+"""
+import numpy as np
+
+from composes.utils.py_matrix_utils import nonzero_invert
+
+from composes.similarity.similarity import Similarity
+from composes.similarity.dot_prod import DotProdSimilarity
+
+
+class CosSimilarity(Similarity):
+    """
+    Computes the cosine similarity of two vectors.
+
+    :math:`sim(\\vec{u},\\vec{v}) = \\frac{<\\vec{u},\\vec{v}>}{\\sqrt{||\\vec{u}||||\\vec{v}||}}`
+
+    """
+
+    def _sim(self, v1, v2):
+        if v1.norm() == 0 or v2.norm() == 0:
+            return 0.0
+        s = DotProdSimilarity()._sim(v1, v2) / np.double(v1.norm() * v2.norm())
+        return s
+
+    def _sims_to_matrix(self, vector, matrix_):
+        sims = DotProdSimilarity()._sims_to_matrix(vector, matrix_)
+
+        vector_norm = vector.norm()
+        row_norms = vector_norm * matrix_.norm(1)
+        row_norms = nonzero_invert(row_norms)
+
+        return sims.scale_rows(row_norms)
+
+
+
+
diff --git a/modules/composes/similarity/dot_prod.py b/modules/composes/similarity/dot_prod.py
new file mode 100755
index 0000000..9323e9a
--- /dev/null
+++ b/modules/composes/similarity/dot_prod.py
@@ -0,0 +1,20 @@
+"""
+Created on Oct 2, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+"""
+from composes.similarity.similarity import Similarity
+
+
+class DotProdSimilarity(Similarity):
+    """
+    Computes the scalar product (dot product) of two vectors.
+
+   :math:`sim(\\vec{u},\\vec{v}) = <\\vec{u},\\vec{v}> = \\sum_iu_iv_i`
+
+    """
+    def _sim(self, v1, v2):
+        return v1.multiply(v2).sum()
+
+    def _sims_to_matrix(self, vector, matrix_):
+        return matrix_ * vector.transpose()
diff --git a/modules/composes/similarity/euclidean.py b/modules/composes/similarity/euclidean.py
new file mode 100755
index 0000000..1a307bf
--- /dev/null
+++ b/modules/composes/similarity/euclidean.py
@@ -0,0 +1,18 @@
+"""
+Created on Oct 2, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+"""
+from composes.similarity.similarity import Similarity
+
+
+class EuclideanSimilarity(Similarity):
+    """
+    Computes the euclidean similarity of two vectors as the inverse of their
+    euclidean distance.
+
+    :math:`sim(\\vec{u},\\vec{v}) = \\frac{1}{||\\vec{u}-\\vec{v}|| + 1}`
+    """
+
+    def _sim(self, v1, v2):
+        return 1 / (1 + (v1 - v2).norm())
diff --git a/modules/composes/similarity/lin.py b/modules/composes/similarity/lin.py
new file mode 100755
index 0000000..604a804
--- /dev/null
+++ b/modules/composes/similarity/lin.py
@@ -0,0 +1,33 @@
+"""
+Created on Oct 2, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+"""
+import numpy as np
+
+from composes.similarity.similarity import Similarity
+
+
+class LinSimilarity(Similarity):
+    """
+    Computes the Lin similarity of two vectors.
+
+    :math:`sim(\\vec{u},\\vec{v}) = \\frac{\\sum_{i \\in I}(u_i+v_i)}{\\sum_iu_i + \\sum_iv_i}`
+
+    Where :math:`I=\\{i | u_i > 0 \\text{ and } v_i > 0\\}`, the set of components
+    on which both vectors are strictly positive.
+
+    """
+
+    def _sim(self, v1, v2):
+
+        common = v1.multiply(v2)
+        common.to_ones()
+        denom = v1.sum() + v2.sum()
+
+        if denom == 0:
+            return 0
+        else:
+            return common.multiply(v1 + v2).sum() / np.double(denom)
+
+
diff --git a/modules/composes/similarity/similarity.py b/modules/composes/similarity/similarity.py
new file mode 100755
index 0000000..3d003fe
--- /dev/null
+++ b/modules/composes/similarity/similarity.py
@@ -0,0 +1,46 @@
+"""
+Created on Oct 2, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+"""
+import numpy as np
+
+from composes.utils.matrix_utils import (
+    assert_is_array_or_matrix,
+    to_compatible_matrix_types,
+)
+
+
+class Similarity(object):
+
+    def get_sim(self, v1, v2):
+
+        assert_is_array_or_matrix(v1)
+        assert_is_array_or_matrix(v2)
+
+        # TODO: figure out where these asserts belong!!
+        v1, v2 = to_compatible_matrix_types(v1, v2)
+        v1.assert_same_shape(v2)
+
+        return self._sim(v1, v2)
+
+    def get_sims_to_matrix(self, vector, matrix_):
+
+        assert_is_array_or_matrix(vector)
+        assert_is_array_or_matrix(matrix_)
+
+        vector, matrix_ = to_compatible_matrix_types(vector, matrix_)
+
+        if vector.shape[1] != matrix_.shape[1] or vector.shape[0] != 1:
+            raise ValueError(
+                'Inconsistent shapes {0} and {1}'.format(vector.shape, matrix_.shape)
+            )
+
+        return self._sims_to_matrix(vector, matrix_)
+
+    def _sims_to_matrix(self, vector, matrix_):
+
+        result = np.zeros(shape=(matrix_.shape[0], 1))
+        for i in range(matrix_.shape[0]):
+            result[i] = self._sim(vector, matrix_[i, :])
+        return type(matrix_)(result)
diff --git a/modules/composes/transformation/__init__.py b/modules/composes/transformation/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/transformation/dim_reduction/__init__.py b/modules/composes/transformation/dim_reduction/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/transformation/dim_reduction/dimensionality_reduction.py b/modules/composes/transformation/dim_reduction/dimensionality_reduction.py
new file mode 100755
index 0000000..5b2776a
--- /dev/null
+++ b/modules/composes/transformation/dim_reduction/dimensionality_reduction.py
@@ -0,0 +1,37 @@
+'''
+Created on Sep 28, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+from composes.semantic_space.operation import DimensionalityReductionOperation
+
+class DimensionalityReduction(object):
+    '''
+    classdocs
+    '''
+
+    _name = "we are NOT stupid"
+
+    def __init__(self, reduced_dimension):
+        '''
+        Constructor
+        '''
+        if reduced_dimension <= 0:
+            raise ValueError("Cannot reduce to non-positive dimensionality: %d"
+                             % reduced_dimension)
+        self._reduced_dimension = reduced_dimension
+
+    def create_operation(self):
+        return DimensionalityReductionOperation(self)
+
+    def get_reduced_dimension(self):
+        return self._reduced_dimension
+
+    def get_name(self):
+        return self._name
+
+    def __str__(self):
+        return self._name
+
+    name = property(get_name)
+    reduced_dimension = property(get_reduced_dimension)
diff --git a/modules/composes/transformation/dim_reduction/nmf.py b/modules/composes/transformation/dim_reduction/nmf.py
new file mode 100755
index 0000000..b7251db
--- /dev/null
+++ b/modules/composes/transformation/dim_reduction/nmf.py
@@ -0,0 +1,136 @@
+'''
+Created on Oct 1, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+import numpy as np
+from dimensionality_reduction import DimensionalityReduction
+from composes.matrix.linalg import Linalg
+from math import sqrt
+
+class Nmf(DimensionalityReduction):
+    """
+    Performs Non-negative Matrix Factorization to reduced dimension :math:`k`.
+
+    Given an input non-negative matrix :math:`X`, it computes the decomposition:
+
+    :math:`X \\approx WH` where W and H are non-negative matrices which minimize
+    :math:`||X-WH||_{2}`
+
+    It returns the matrix W.
+    """
+
+    _name = "nmf"
+
+    def __init__(self, reduced_dimension):
+        '''
+        Constructor
+        '''
+        super(Nmf, self).__init__(reduced_dimension)
+
+    def apply(self, matrix_):
+
+        matrix_.assert_positive()
+        #w_init, h_init = self.nndsvd_init(matrix_)
+        w_init, h_init = self.v_col_init(matrix_)
+        #w_init, h_init = self.random_init(matrix_)
+        w, h = Linalg.nmf(matrix_, w_init, h_init)
+        return w, Linalg.pinv(h)
+
+    def random_init(self, matrix_):
+
+        # TODO: implement the fancier but still fast init (from nimfa: v_col)
+        rndcol = np.random.random_integers(0, matrix_.shape[1] - 1,
+                                           self._reduced_dimension)
+
+        rndrow = np.random.random_integers(0, matrix_.shape[0] - 1,
+                                           self._reduced_dimension)
+
+        #otherwise we would have had to convert to DenseMatrix/SparseMatrix
+        #type(matrix_)(result)
+        w = matrix_[:, rndcol]
+        h = matrix_[rndrow, :]
+
+        return w, h
+
+    def v_col_init(self, matrix_):
+        w = np.zeros((matrix_.shape[0], self._reduced_dimension))
+        h = np.zeros((self._reduced_dimension, matrix_.shape[1]))
+
+        #in case there are less than 5 rows or columns
+        p_col = matrix_.shape[1]//5 + 1
+        p_row = matrix_.shape[0]//5 + 1
+        for i in range(self._reduced_dimension):
+
+            rndcol = np.random.random_integers(0, matrix_.shape[1] - 1,
+                                           p_col)
+
+            rndrow = np.random.random_integers(0, matrix_.shape[0] - 1,
+                                           p_row)
+
+            w[:, i] = (matrix_[:, rndcol].sum(1)/float(p_col)).flatten()
+            h[i, :] = (matrix_[rndrow, :].sum(0)/float(p_row)).flatten()
+
+        w = type(matrix_)(w)
+        h = type(matrix_)(h)
+
+        return w, h
+
+    def nndsvd_init(self,matrix_):
+        def matrix_abs(mat_):
+            mat_p = mat_.get_non_negative()
+            mat_n_abs = mat_p - mat_
+            return mat_p + mat_n_abs
+
+        def padd_zeros(matrix_, axis, thickness):
+            matrix_type = type(matrix_)
+            if axis == 0:
+                append_mat = matrix_type(np.zeros((thickness, matrix_.shape[1])))
+                return matrix_.vstack(append_mat)
+            elif axis == 1:
+                append_mat = matrix_type(np.zeros((matrix_.shape[0], thickness)))
+                return matrix_.hstack(append_mat)
+
+        u, s, v = Linalg.svd(matrix_, self._reduced_dimension);
+
+        rank = u.shape[1]
+        w = [[]]*rank
+        h = [[]]*rank
+
+        vt = v.transpose()
+
+        w[0] = sqrt(s[0]) * matrix_abs(u[:,0])
+        h[0] = sqrt(s[0]) * matrix_abs(vt[0,:])
+
+        for i in range(1,rank):
+            uu = u[:,i]
+            vv = vt[i,:]
+            uup = uu.get_non_negative()
+            uun = uup - uu
+            vvp = vv.get_non_negative()
+            vvn = vvp - vv
+
+            n_uup = uup.norm()
+            n_uun = uun.norm()
+            n_vvp = vvp.norm()
+            n_vvn = vvn.norm()
+
+            termp = n_uup * n_vvp; termn = n_uun * n_vvn
+            if (termp >= termn):
+                w[i] = sqrt(s[i] * termp) * uup / n_uup
+                h[i] = sqrt(s[i] * termp) * vvp / n_vvp
+            else:
+                w[i] = sqrt(s[i] * termn) * uun / n_uun
+                h[i] = sqrt(s[i] * termn) * vvn / n_vvn
+
+        w = matrix_.nary_hstack(w)
+        h = matrix_.nary_vstack(h)
+
+        w.remove_small_values(0.0000000001)
+        h.remove_small_values(0.0000000001)
+
+        if (rank < self._reduced_dimension):
+            w = padd_zeros(w, 1, self._reduced_dimension - rank)
+            h = padd_zeros(h, 0, self._reduced_dimension - rank)
+        return w,h
diff --git a/modules/composes/transformation/dim_reduction/svd.py b/modules/composes/transformation/dim_reduction/svd.py
new file mode 100755
index 0000000..417a588
--- /dev/null
+++ b/modules/composes/transformation/dim_reduction/svd.py
@@ -0,0 +1,33 @@
+'''
+Created on Sep 28, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from dimensionality_reduction import DimensionalityReduction
+from composes.matrix.linalg import Linalg
+
+class Svd(DimensionalityReduction):
+    """
+    Performs truncated Singular Value Decomposition to a reduced dimension :math:`k`.
+
+    Given an input matrix :math:`X`, it computes the decomposition:
+
+    :math:`X = U \\Sigma V^{T}`
+
+    It returns :math:`U \\Sigma` truncated to dimension  :math:`min(k,rank(X))`
+    """
+
+    _name = "svd"
+
+    def __init__(self, reduced_dimension):
+        '''
+        Constructor
+        '''
+        super(Svd, self).__init__(reduced_dimension)
+
+    def apply(self, matrix_):
+
+        u, s, v = Linalg.svd(matrix_, self._reduced_dimension)
+        return u.scale_columns(s), v
+
diff --git a/modules/composes/transformation/feature_selection/__init__.py b/modules/composes/transformation/feature_selection/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/transformation/feature_selection/feature_selection.py b/modules/composes/transformation/feature_selection/feature_selection.py
new file mode 100755
index 0000000..2e9a86e
--- /dev/null
+++ b/modules/composes/transformation/feature_selection/feature_selection.py
@@ -0,0 +1,27 @@
+'''
+Created on Oct 5, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+from composes.semantic_space.operation import FeatureSelectionOperation
+
+class FeatureSelection(object):
+    '''
+    classdocs
+    '''
+
+
+    def __init__(self, reduced_dimension):
+
+        if reduced_dimension <= 0:
+            raise ValueError("Cannot reduce to non-positive dimensionality: %d"
+                             % reduced_dimension)
+        self._reduced_dimension = reduced_dimension
+
+    def create_operation(self):
+        return FeatureSelectionOperation(self)
+
+    def get_reduced_dimension(self):
+        return self._reduced_dimension
+
+    reduced_dimension = property(get_reduced_dimension)
\ No newline at end of file
diff --git a/modules/composes/transformation/feature_selection/top_feature_selection.py b/modules/composes/transformation/feature_selection/top_feature_selection.py
new file mode 100755
index 0000000..1b42eb9
--- /dev/null
+++ b/modules/composes/transformation/feature_selection/top_feature_selection.py
@@ -0,0 +1,54 @@
+'''
+Created on Oct 5, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+from warnings import warn
+from feature_selection import FeatureSelection
+
+class TopFeatureSelection(FeatureSelection):
+    """
+    Sorts the columns of a space according to some criterion and returns a space
+    containing only the top :math:`k` ones.
+
+    Available criteria:
+
+    sum: Default. Ranks columns according to the sum on their elements.
+
+    length: Ranks columns according to their vector length.
+
+    """
+
+    _name = "top_feature_selection"
+    _valid_criteria = {"sum", "length"}
+
+    def __init__(self, reduced_dimension, criterion='sum'):
+        '''
+        Constructor
+        '''
+        super(TopFeatureSelection, self).__init__(reduced_dimension)
+
+        if criterion:
+            if criterion not in self._valid_criteria:
+                raise ValueError("Unrecognized criterion: %s" % criterion)
+            self.criterion = criterion
+
+    def apply(self, matrix_):
+
+        if self.criterion == "sum":
+            norm_function = matrix_.sum
+        else:
+            norm_function = matrix_.norm
+
+        if self._reduced_dimension >= matrix_.shape[1]:
+            warn("Reduced dimension larger than number of columns!")
+
+        no_columns = min(self._reduced_dimension, matrix_.shape[1])
+        sorted_perm = matrix_.sorted_permutation(norm_function, 0)
+
+        sorted_perm = sorted_perm[0:no_columns]
+        matrix_ = matrix_[:, sorted_perm]
+
+        return matrix_, sorted_perm
+
+
diff --git a/modules/composes/transformation/scaling/__init__.py b/modules/composes/transformation/scaling/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/transformation/scaling/epmi_weighting.py b/modules/composes/transformation/scaling/epmi_weighting.py
new file mode 100755
index 0000000..e9bd407
--- /dev/null
+++ b/modules/composes/transformation/scaling/epmi_weighting.py
@@ -0,0 +1,52 @@
+
+from scaling import Scaling
+from composes.utils.py_matrix_utils import nonzero_invert
+
+class EpmiWeighting(Scaling):
+    """
+     Exponential Point-wise Mutual Information.
+
+     :math:`epmi(r,c) = \\frac{P(r,c)}{P(r)P(c)}`
+
+    """
+
+    _name = 'epmi'
+    _uses_column_stats = True
+
+    def apply(self, matrix_, column_marginal=None):
+        """
+        Performs epmi weighting.
+
+        Args:
+            matrix_ (Matrix): Input matrix
+
+            column_marginal (np.ndarray): column marginals of the
+                core matrix if the matrix is a peripheral matrix
+
+        Returns:
+            Matrix: the matrix after applying epmi.
+
+        """
+
+        matrix_.assert_positive()
+        row_sum = matrix_.sum(axis = 1)
+
+        if not column_marginal is None:
+            col_sum = column_marginal
+        else:
+            col_sum = matrix_.sum(axis = 0)
+
+        total = col_sum.sum()
+
+        row_sum = nonzero_invert(row_sum)
+        col_sum = nonzero_invert(col_sum)
+        col_sum = col_sum * total
+
+        matrix_ = matrix_.scale_rows(row_sum)
+        matrix_ = matrix_.scale_columns(col_sum)
+
+        return matrix_
+
+    def get_column_stats(self, matrix_):
+        return matrix_.sum(0)
+
diff --git a/modules/composes/transformation/scaling/normalization.py b/modules/composes/transformation/scaling/normalization.py
new file mode 100755
index 0000000..13c5767
--- /dev/null
+++ b/modules/composes/transformation/scaling/normalization.py
@@ -0,0 +1,55 @@
+'''
+Created on Oct 4, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+from numpy import double
+from warnings import warn
+from scaling import Scaling
+
+class Normalization(Scaling):
+    """
+    Normalizes the a space according to a some criterion.
+
+    Available criteria:
+
+    sum: Default. The result matrix :math:`X` will satisfy: :math:`\\sum_{i,j} X_{ij}=1`
+
+    length: The result matrix :math:`X` will satisfy: :math:`\\sqrt{\\sum_{i,j} X_{ij}^2}=1`
+
+    """
+    _name = "row_normalization"
+    _valid_criteria = ["sum", "length"]
+    _uses_column_stats = True
+
+    def __init__(self, criterion='sum'):
+        '''
+        Constructor
+        '''
+        if criterion:
+            if criterion not in self._valid_criteria:
+                raise ValueError("Unrecognized criterion: %s" % criterion)
+            self.criterion = criterion
+
+
+    def apply(self, matrix_, total=None):
+
+        if total is None:
+            if self.criterion == "length":
+                total = matrix_.norm()
+            else:
+                total = matrix_.sum()
+
+        if total == 0:
+            warn("Could not normalize: sum/length of matrix is 0.")
+            return matrix_
+
+        matrix_ = (1 / double(total)) * matrix_
+        return matrix_
+
+    def get_column_stats(self, matrix_):
+
+        if self.criterion == "length":
+            return matrix_.norm()
+        else:
+            return matrix_.sum()
diff --git a/modules/composes/transformation/scaling/plmi_weighting.py b/modules/composes/transformation/scaling/plmi_weighting.py
new file mode 100755
index 0000000..39f759b
--- /dev/null
+++ b/modules/composes/transformation/scaling/plmi_weighting.py
@@ -0,0 +1,22 @@
+
+from scaling import Scaling
+from ppmi_weighting import PpmiWeighting
+
+class PlmiWeighting(Scaling):
+    """
+     Positive Local Mutual Information.
+
+     :math:`plmi(r,c)=ppmi(r,c)count(r,c)`
+
+    """
+
+    _name = "plmi"
+    _uses_column_stats = True
+
+    def apply(self, matrix_, column_marginal=None):
+        return matrix_.multiply(PpmiWeighting().apply(matrix_,
+                                                                column_marginal))
+
+
+    def get_column_stats(self, matrix_):
+        return matrix_.sum(0)
\ No newline at end of file
diff --git a/modules/composes/transformation/scaling/plog_weighting.py b/modules/composes/transformation/scaling/plog_weighting.py
new file mode 100755
index 0000000..478102e
--- /dev/null
+++ b/modules/composes/transformation/scaling/plog_weighting.py
@@ -0,0 +1,29 @@
+
+from scaling import Scaling
+
+class PlogWeighting(Scaling):
+    """
+     Positive Log Weighting
+
+     :math:`plog(r,c)= log(r,c) \\text{ if } log(r,c) \\geq 0 \\text{ else } 0`
+    """
+
+    _name = "plog"
+
+    def apply(self, matrix_):
+        '''
+        Performs positive log weighting.
+
+        Args:
+            matrix_ (Matrix): Input matrix
+            column_marginal (array): column marginals of the core matrix if the matrix is a peripheral matrix
+
+        Returns:
+            Matrix: the matrix after applying plog
+
+        '''
+        matrix_ = matrix_.copy()
+        matrix_.plog()
+        return matrix_
+
+
diff --git a/modules/composes/transformation/scaling/ppmi_weighting.py b/modules/composes/transformation/scaling/ppmi_weighting.py
new file mode 100755
index 0000000..b171a48
--- /dev/null
+++ b/modules/composes/transformation/scaling/ppmi_weighting.py
@@ -0,0 +1,30 @@
+
+from scaling import Scaling
+from epmi_weighting import EpmiWeighting
+
+class PpmiWeighting(Scaling):
+    """
+    Positive Point-wise Mutual Information.
+
+
+    :math:`pmi(r,c) = log\\frac{P(r,c)}{P(r)P(c)}`
+
+    :math:`ppmi(r,c)= pmi(r,c) \\text{ if } pmi(r,c)\\geq 0 \\text{ else } 0`
+    """
+
+    _name = "ppmi"
+    _uses_column_stats = True
+
+    def apply(self, matrix_, column_marginal=None):
+
+        matrix_ = EpmiWeighting().apply(matrix_, column_marginal)
+        matrix_.plog()
+        return matrix_
+
+    def get_column_stats(self, matrix_):
+        return matrix_.sum(0)
+
+    """
+    :math:`ppmi(r,c)=\\begin{cases}pmi(rc) & \\text{if }pmi(r,c)\\geq0
+                      0 & \\text{otherwise}\\end{cases}`
+    """
\ No newline at end of file
diff --git a/modules/composes/transformation/scaling/row_normalization.py b/modules/composes/transformation/scaling/row_normalization.py
new file mode 100755
index 0000000..b6145d2
--- /dev/null
+++ b/modules/composes/transformation/scaling/row_normalization.py
@@ -0,0 +1,46 @@
+'''
+Created on Oct 4, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from scaling import Scaling
+from composes.utils.py_matrix_utils import nonzero_invert
+
+class RowNormalization(Scaling):
+    """
+    Normalizes the rows of a space according to a some criterion.
+
+    Available criteria:
+
+    length: Default. Each row :math:`X_i` of the result matrix will satisfy: :math:`\\sqrt{\\sum_j X_{ij}^2}=1`
+
+
+    sum: Each row :math:`X_i` of the result matrix will satisfy: :math:`\\sum_j X_{ij}=1`
+
+    """
+    _name = "row_normalization"
+    _valid_criteria = ["sum", "length"]
+
+    def __init__(self, criterion='length'):
+        '''
+        Constructor
+        '''
+        if criterion:
+            if criterion not in self._valid_criteria:
+                raise ValueError("Unrecognized criterion: %s" % criterion)
+            self.criterion = criterion
+
+
+    def apply(self, matrix_):
+
+        if self.criterion == "length":
+            row_norms = matrix_.norm(axis=1)
+        else:
+            row_norms = matrix_.sum(axis=1)
+
+        inv_row_norm = nonzero_invert(row_norms)
+        matrix_ = matrix_.scale_rows(inv_row_norm)
+        return matrix_
+
+
diff --git a/modules/composes/transformation/scaling/scaling.py b/modules/composes/transformation/scaling/scaling.py
new file mode 100755
index 0000000..52765a7
--- /dev/null
+++ b/modules/composes/transformation/scaling/scaling.py
@@ -0,0 +1,29 @@
+'''
+Created on Sep 20, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from composes.semantic_space.operation import ScalingOperation
+
+class Scaling(object):
+    '''
+    classdocs
+    '''
+    _name = "we are NOT stupid"
+    _uses_column_stats = False
+
+    def get_name(self):
+        return self._name
+
+    def get_uses_column_stats(self):
+        return self._uses_column_stats
+
+    def create_operation(self):
+        return ScalingOperation(self)
+
+    def __str__(self):
+        return self._name
+
+    name = property(get_name)
+    uses_column_stats = property(get_uses_column_stats)
\ No newline at end of file
diff --git a/modules/composes/utils/__init__.py b/modules/composes/utils/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/modules/composes/utils/crossvalidation_utils.py b/modules/composes/utils/crossvalidation_utils.py
new file mode 100755
index 0000000..c0a0432
--- /dev/null
+++ b/modules/composes/utils/crossvalidation_utils.py
@@ -0,0 +1,35 @@
+'''
+Created on Oct 9, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from random import shuffle
+
+def get_split_indices(range_len, fold):
+
+    if fold <= 0:
+        raise ValueError("Strictly positive number of folds required, received %s:"
+                         % fold)
+
+    indices_list = []
+    if range_len < fold:
+        return get_split_indices(range_len, range_len)
+
+    range_ = range(range_len)
+    shuffle(range_)
+    current_index = 0
+    for i in range(fold):
+        if i < len(range_)%fold:
+            slice_length = range_len // fold + 1
+        else:
+            slice_length = range_len // fold
+
+        indices_list.append(range_[current_index:current_index + slice_length])
+        current_index += slice_length
+
+    return indices_list
+
+def get_submatrix_list(matrix_, indices_list):
+    return [matrix_[indices, :] for indices in indices_list]
+
diff --git a/modules/composes/utils/gen_utils.py b/modules/composes/utils/gen_utils.py
new file mode 100755
index 0000000..877280a
--- /dev/null
+++ b/modules/composes/utils/gen_utils.py
@@ -0,0 +1,29 @@
+'''
+Created on May 21, 2013
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+from composes.exception.invalid_argument_error import InvalidArgumentError
+
+
+def assert_is_instance(object_, class_):
+    if not isinstance(object_, class_):
+        raise TypeError("expected %s, received %s" % (class_, type(object_)))
+
+
+def get_partitions(sorted_list, min_samples):
+    prev_idx = 0
+    range_list = []
+    for i in range(1, len(sorted_list)):
+        if sorted_list[i] != sorted_list[i - 1]:
+            if i - prev_idx >= min_samples:
+                range_list.append((prev_idx, i))
+
+            prev_idx = i
+
+    if len(sorted_list) - prev_idx >= min_samples:
+        range_list.append((prev_idx, len(sorted_list)))
+
+    keys = [sorted_list[range_list[i][0]] for i in xrange(len(range_list))]
+
+    return keys, range_list
\ No newline at end of file
diff --git a/modules/composes/utils/io_utils.py b/modules/composes/utils/io_utils.py
new file mode 100755
index 0000000..69f7f4b
--- /dev/null
+++ b/modules/composes/utils/io_utils.py
@@ -0,0 +1,272 @@
+'''
+Created on Oct 16, 2012
+
+@author: nghia
+'''
+
+import numpy as np
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+import os
+import gzip as gzip
+from warnings import warn
+from scipy.sparse import csr_matrix
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.matrix.sparse_matrix import SparseMatrix
+from composes.utils.gen_utils import assert_is_instance
+import struct
+
+
+def save(object_, file_name):
+    create_parent_directories(file_name)
+    try:
+        with open(file_name, 'wb') as f:
+            pickle.dump(object_, f, 2)
+    except struct.error:
+        warn("object is too big, using pickle with protocol 0")
+        with open(file_name, 'wb') as f:
+            pickle.dump(object_, f, 0)
+
+
+def load(file_name, data_type=None):
+    with open(file_name, 'rb') as f:
+        result = pickle.load(f)
+
+    if not data_type is None:
+        assert_is_instance(result, data_type)
+
+    return result
+
+
+def create_directories(directory):
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+
+def create_parent_directories(file_name):
+    parent_dir = os.path.dirname(file_name)
+
+    if parent_dir != "" and not os.path.exists(parent_dir):
+        os.makedirs(parent_dir)
+
+
+def extract_indexing_structs(filename, field_list):
+    str2id = {}
+    id2str = []
+    no_fields = len(field_list)
+
+    str2id_list = [str2id.copy() for i in xrange(no_fields)]
+    id2str_list = [list(id2str) for i in xrange(no_fields)]
+    index_list = [0 for i in xrange(no_fields)]
+    max_field = max(field_list)
+
+    if filename.endswith(".gz"):
+        input_stream = gzip.open(filename, "rb")
+    else:
+        input_stream = open(filename, "rb")
+
+    for line in input_stream:
+        if line.strip() != "":
+            elements = line.strip().split()
+            if len(elements) <= max_field:
+                warn("Invalid input line:%s. Skipping it" % line.strip())
+            else:
+                for field_idx, field in enumerate(field_list):
+                    current_str = elements[field]
+                    if not current_str in str2id_list[field_idx]:
+                        str2id_list[field_idx][current_str] = index_list[field_idx]
+                        id2str_list[field_idx].append(current_str)
+                        index_list[field_idx] += 1
+
+    for id2str in id2str_list:
+        if not id2str:
+            raise ValueError("Found no valid data in file: %s!" % filename)
+    return (id2str_list, str2id_list)
+
+
+def read_tuple_list(data_file, fields=None):
+    field_list = []
+    result = []
+
+    if fields:
+        field_list = fields
+
+    with open(data_file) as f:
+        for line in f:
+            line = line.strip()
+            if line != "":
+                elements = line.split()
+                if field_list:
+                    try:
+                        elements = np.array(elements)[field_list]
+                    except IndexError:
+                        raise IndexError("Cannot extract fields:%s from %s!"
+                                         % (field_list, data_file))
+
+                result.append(tuple(elements))
+
+    return result
+
+
+def read_list(file_name, **kwargs):
+    field = None
+    result = []
+    if "field" in kwargs:
+        field = kwargs["field"]
+
+    with open(file_name) as f:
+        for line in f:
+            line = line.strip()
+            if line != "":
+                if not field is None:
+                    try:
+                        result.append(line.split()[field])
+                    except IndexError:
+                        raise IndexError("Cannot extract field:%s from %s!"
+                                         % (field, file_name))
+                else:
+                    result.append(line)
+    return result
+
+
+def read_sparse_space_data(matrix_file, row2id, column2id, dtype=np.double):
+    if matrix_file.endswith(".gz"):
+        f = gzip.open(matrix_file, "rb")
+    else:
+        f = open(matrix_file, "rb")
+
+    no_lines = sum(1 for line in f if line.strip() != "")
+    f.close()
+
+    row = np.zeros(no_lines, dtype=np.int32)
+    col = np.zeros(no_lines, dtype=np.int32)
+
+    data = np.zeros(no_lines, dtype=dtype)
+
+    if matrix_file.endswith(".gz"):
+        f = gzip.open(matrix_file, "rb")
+    else:
+        f = open(matrix_file, "rb")
+
+    i = 0
+    for line in f:
+        if line.strip() != "":
+            line_elements = line.strip().split()
+            if len(line_elements) >= 3:
+                [word1, word2, count] = line_elements[0:3]
+                if word1 in row2id and word2 in column2id:
+                    row[i] = row2id[word1]
+                    col[i] = column2id[word2]
+                    data[i] = dtype(count)
+                    i += 1
+                    if i % 1000000 == 0:
+                        print "Progress...%d" % i
+            #if len(line_elements) > 3:
+            #    warn("Invalid input line:%s. Expected 3 fields, ignoring additional ones!" % line.strip())
+            else:
+                raise ValueError("Invalid row: %s, expected at least %d fields"
+                                 % (line.strip(), 3))
+
+    f.close()
+    # eliminate the extra zeros created when word1 or word2 is not row2id or col2id!!
+    data = data[0:i]
+    row = row[0:i]
+    col = col[0:i]
+
+    m = SparseMatrix(csr_matrix((data, (row, col)), shape=(len(row2id), len(column2id))))
+    if m.mat.nnz != i:
+        warn("Found 0-counts or duplicate row,column pairs. (Duplicate entries are summed up.)")
+
+    return m
+
+
+def read_dense_space_data(matrix_file, row2id, element_type=np.double):
+    #get number of rows and columns
+    if matrix_file.endswith(".gz"):
+        f = gzip.open(matrix_file, "rb")
+    else:
+        f = open(matrix_file, "rb")
+
+    first_line = f.next()
+    no_cols = len(first_line.strip().split()) - 1
+    if no_cols <= 0:
+        raise ValueError("Invalid row: %s, expected at least %d fields" % (first_line.strip(), 2))
+    f.close()
+
+    no_rows = len(row2id)
+    row_string_set = set([])
+
+    m = np.mat(np.zeros(shape=(no_rows, no_cols), dtype=element_type))
+
+    if matrix_file.endswith(".gz"):
+        f = gzip.open(matrix_file, "rb")
+    else:
+        f = open(matrix_file, "rb")
+
+    for line in f:
+        if not line.strip() == "":
+            elements = line.strip().split()
+            if len(elements) != no_cols + 1:
+                raise ValueError("Invalid row: %s, expected %d fields"
+                                 % (line.strip(), no_cols + 1))
+            word = elements[0]
+            if word in row2id:
+                i = row2id[word]
+                if word in row_string_set != 0:
+                    warn("Found duplicate row: %s. Ignoring it." % word)
+                else:
+                    m[i, :] = elements[1:]
+                    row_string_set.add(word)
+
+    f.close()
+
+    return DenseMatrix(m)
+
+
+def print_list(list_, file_name):
+    with open(file_name, 'w') as f:
+        for item in list_:
+            f.write(item + "\n")
+
+
+def print_cooc_mat_sparse_format(matrix_, id2row, id2column, file_prefix):
+    matrix_file = "%s.%s" % (file_prefix, "sm")
+    if not id2column:
+        raise ValueError("Cannot print matrix with no column info in sparse format!")
+
+    mat = matrix_.mat
+    with open(matrix_file, 'w') as f:
+        if isinstance(matrix_, SparseMatrix):
+
+            data = mat.data
+            row_indices = mat.indptr
+            col_indices = mat.indices
+
+            row_index = 0
+            next_row = row_indices[1]
+            row = id2row[0]
+            for i in xrange(len(data)):
+                while i == next_row:
+                    row_index += 1
+                    next_row = row_indices[row_index + 1]
+                    row = id2row[row_index]
+                col = id2column[col_indices[i]]
+                f.write("%s\t%s\t%f\n" % (row, col, data[i]))
+        else:
+            for i in range(mat.shape[0]):
+                for j in range(mat.shape[1]):
+                    if mat[i, j] != 0:
+                        f.write("%s\t%s\t%f\n" % (id2row[i], id2column[j], mat[i, j]))
+
+
+def print_cooc_mat_dense_format(matrix_, id2row, file_prefix):
+    matrix_file = "%s.%s" % (file_prefix, "dm")
+
+    with open(matrix_file, 'w') as f:
+        for i, row in enumerate(id2row):
+            v = DenseMatrix(matrix_[i]).mat.flat
+            line = "\t".join([row] + [repr(v[j]) for j in range(len(v))])
+            f.write("%s\n" % (line))
+
diff --git a/modules/composes/utils/log_utils.py b/modules/composes/utils/log_utils.py
new file mode 100755
index 0000000..94b3346
--- /dev/null
+++ b/modules/composes/utils/log_utils.py
@@ -0,0 +1,110 @@
+'''
+Created on Oct 15, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from numpy import double
+import logging
+from composes.utils.io_utils import create_parent_directories
+
+def config_logging(file_name, level = logging.INFO, format_ =""):
+    if not file_name is None:
+        create_parent_directories(file_name)
+        logging.basicConfig(filename=file_name, level=level, format=format_)
+        logging.debug("start logging")
+
+
+def get_ident(delim, ident_level):
+    return delim * ident_level
+
+def print_matrix_info(logger_, matrix_, ident_level, intro_string):
+    delim = "  "
+    ident = get_ident(delim, ident_level)
+    logger_string = ident + intro_string
+    ident = ident + delim
+
+    logger_string += ("\n%sMatrix type:%s" % (ident, type(matrix_).__name__))
+    logger_string += ("\n%sMatrix shape:%sx%s" % (ident, matrix_.shape[0],
+                                          matrix_.shape[1]))
+
+    if type(matrix_).__name__ == "SparseMatrix":
+        perc_nnz = 100 * matrix_.mat.nnz/double(matrix_.shape[0]*matrix_.shape[1])
+        logger_string += ("\n%sPerc. non-zero entries:%d" % (ident, perc_nnz))
+
+    logger_.info(logger_string)
+
+
+def get_learner_info(learner, ident):
+    logger_string = ""
+
+    if hasattr(learner, '_intercept'):
+        logger_string += ("\n%sUsing intercept:%s" % (ident, learner._intercept))
+
+    if hasattr(learner, '_crossvalidation'):
+        logger_string += ("\n%sUsing crossvalidation:%s" % (ident, learner._crossvalidation))
+
+        if learner._crossvalidation and hasattr(learner, '_folds'):
+            logger_string += ("\n%sUsing number of folds:%s" % (ident, learner._folds))
+
+    return logger_string
+
+def print_composition_model_info(logger_, model, ident_level, intro_string):
+
+    delim = "  "
+    ident = get_ident(delim, ident_level)
+    logger_string = ident + intro_string
+    ident = ident + delim
+
+    logger_.info(logger_string)
+
+    print_name(logger_, model, ident_level, "Composition model type:")
+
+    logger_string = ""
+    if hasattr(model, '_regression_learner'):
+        logger_string += ("\n%sUsing regression:%s" % (ident,
+                                                       type(model.regression_learner).__name__))
+        logger_string += get_learner_info(model.regression_learner, ident + delim)
+
+    logger_.info(logger_string)
+
+def print_transformation_info(logger_, trans, ident_level, intro_string):
+    delim = "  "
+    ident = get_ident(delim, ident_level)
+    logger_string = ident + intro_string
+    ident = ident + delim
+
+    logger_string += ("\n%sTransformation type:%s" % (ident, type(trans).__name__))
+
+    if hasattr(trans, '_reduced_dimension'):
+        logger_string += ("\n%sReduced dimension:%s" % (ident, trans.reduced_dimension))
+
+
+    logger_.info(logger_string)
+
+def print_info(logger_, ident_level, text):
+    delim = "  "
+    ident = get_ident(delim, ident_level)
+    logger_string = ident + ""
+
+    logger_string += "\n%s%s" % (ident, text)
+    logger_.info(logger_string)
+
+def print_name(logger_, object_, ident_level, intro_string):
+    delim = "  "
+    ident = get_ident(delim, ident_level)
+    logger_string = ident + intro_string
+    ident = ident + delim
+
+    logger_string += ("\n%s%s" % (ident, type(object_).__name__))
+
+    logger_.info(logger_string)
+
+def print_time_info(logger_, end, beg, ident_level):
+    delim = "  "
+    ident = get_ident(delim, ident_level)
+    logger_string = ident
+    logger_string += ("\n%sTiming:%s seconds" % (ident, end - beg))
+
+    logger_.info(logger_string)
+
diff --git a/modules/composes/utils/matrix_utils.py b/modules/composes/utils/matrix_utils.py
new file mode 100755
index 0000000..3b5c9e6
--- /dev/null
+++ b/modules/composes/utils/matrix_utils.py
@@ -0,0 +1,103 @@
+
+import numpy as np
+from composes.matrix.sparse_matrix import SparseMatrix
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.matrix.matrix import Matrix
+from scipy.sparse import issparse
+from py_matrix_utils import is_array
+from warnings import warn
+
+def to_matrix(matrix_):
+    """
+    Converts an array-like structure to a DenseMatrix/SparseMatrix
+    """
+    if issparse(matrix_):
+        return SparseMatrix(matrix_)
+    else:
+        return DenseMatrix(matrix_)
+
+def is_array_or_matrix(data):
+    return is_array(data) or isinstance(data, Matrix)
+
+
+def assert_is_array_or_matrix(data):
+    if not is_array_or_matrix(data):
+        raise TypeError("expected array-like or matrix, received %s"
+                        % (type(data)))
+
+def padd_matrix(matrix_, axis, value=1):
+    matrix_type = type(matrix_)
+    if axis == 0:
+        append_mat = matrix_type(np.ones((1, matrix_.shape[1]))*value)
+        return matrix_.vstack(append_mat)
+    elif axis == 1:
+        append_mat = matrix_type(np.ones((matrix_.shape[0], 1))*value)
+        return matrix_.hstack(append_mat)
+    else:
+        raise ValueError("Invalid axis value:%s" % axis)
+
+
+def assert_same_shape(matrix1, matrix2, axis=None):
+
+    if axis is None:
+        if matrix1.shape != matrix2.shape:
+            raise ValueError("Inconsistent shapes")
+    else:
+        if not axis in [0, 1]:
+            raise ValueError("Invalid axis value: %s, expected 0 or 1." % axis)
+        if matrix1.shape[axis] != matrix2.shape[axis]:
+            raise ValueError("Inconsistent shapes")
+
+
+def to_compatible_matrix_types(v1, v2):
+
+    if isinstance(v1, Matrix) and isinstance(v2, Matrix):
+        v2 = type(v1)(v2)
+    elif not isinstance(v1, Matrix) and isinstance(v2, Matrix):
+        v1 = type(v2)(v1)
+    elif not isinstance(v2, Matrix) and isinstance(v1, Matrix):
+        v2 = type(v1)(v2)
+    else:
+        v1 = to_matrix(v1)
+        v2 = type(v1)(v2)
+
+    return v1, v2
+
+
+
+def get_type_of_largest(matrix_list):
+    max_dim = 0
+    max_type = None
+    for matrix_ in matrix_list:
+        if matrix_.shape[0] * matrix_.shape[1] > max_dim:
+            max_type = type(matrix_)
+            max_dim = matrix_.shape[0] * matrix_.shape[1]
+
+    return max_type
+
+def resolve_type_conflict(matrix_list, matrix_type):
+    new_matrix_list = []
+
+    if matrix_type_conflict(matrix_list):
+        warn("Efficiency warning: matrices should have the same dense/sparse type!")
+        for matrix_ in matrix_list:
+            new_matrix_list.append(matrix_type(matrix_))
+        return new_matrix_list
+
+    return list(matrix_list)
+
+
+def matrix_type_conflict(matrix_list):
+
+    if not matrix_list:
+        return False
+
+    matrix_type = type(matrix_list[0])
+    for matrix_ in matrix_list:
+        if not isinstance(matrix_, matrix_type):
+            return True
+
+    return False
+
+
+
diff --git a/modules/composes/utils/mem_utils.py b/modules/composes/utils/mem_utils.py
new file mode 100755
index 0000000..db1b474
--- /dev/null
+++ b/modules/composes/utils/mem_utils.py
@@ -0,0 +1,16 @@
+'''
+Created on Sep 21, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+"""
+Wrappers around psutil functions that display memory usage information.
+"""
+import numpy as np
+from os import getpid
+import psutil
+
+def get_mem_usage():
+    p = psutil.Process(getpid())
+    return p.get_memory_info()[0]/np.double(1024*1024)
\ No newline at end of file
diff --git a/modules/composes/utils/num_utils.py b/modules/composes/utils/num_utils.py
new file mode 100755
index 0000000..c9cb215
--- /dev/null
+++ b/modules/composes/utils/num_utils.py
@@ -0,0 +1,15 @@
+'''
+Created on Sep 18, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+from numbers import Number
+from numbers import Integral
+import numpy as np
+
+def is_numeric(operand):
+    return isinstance(operand, (Number, np.number))
+
+def is_integer(operand):
+    return isinstance(operand, Integral)
diff --git a/modules/composes/utils/py_matrix_utils.py b/modules/composes/utils/py_matrix_utils.py
new file mode 100755
index 0000000..172e1d3
--- /dev/null
+++ b/modules/composes/utils/py_matrix_utils.py
@@ -0,0 +1,35 @@
+'''
+Created on Sep 19, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+import numpy as np
+from scipy.sparse import spdiags
+
+
+def array_to_csr_diagonal(array_):
+    #array_ can't be a sparse matrix, if it is dense, it has to be a row matrix
+    #(i.e. shape = (1, x))
+
+    flat_array = array_.flatten()
+    array_size = flat_array.size
+    csr_diag = spdiags(flat_array, [0], array_size, array_size, format = 'csr')
+    return csr_diag
+
+def is_array(operand):
+    return hasattr(operand, 'dtype') and hasattr(operand, 'shape')
+
+
+def nonzero_invert(matrix_):
+    '''
+    Performs 1/x for all x, non-zero elements of the matrix.
+
+    Params:
+        matrix_: np.matrix
+    '''
+
+    matrix_ = matrix_.astype(np.double)
+    matrix_[matrix_ != 0] = np.array(1.0/matrix_[matrix_ != 0]).flatten()
+    return matrix_
+
+
diff --git a/modules/composes/utils/regression_learner.py b/modules/composes/utils/regression_learner.py
new file mode 100755
index 0000000..fd7b641
--- /dev/null
+++ b/modules/composes/utils/regression_learner.py
@@ -0,0 +1,106 @@
+import numpy as np
+from composes.matrix.linalg import Linalg
+
+
+class RegressionLearner(object):
+    """
+    Implements a set of regression methods.
+
+    Supported regression methods are least squares regression and
+    ridge regression. Ridge regression can be used with generalized
+    cross validation. (Hastie, Tibshirani and Friedman, Second edition,
+    page 244)
+    """
+
+
+    def __init__(self):
+        '''
+        Constructor
+        '''
+
+    def has_intercept(self):
+        return self._intercept
+
+
+class LstsqRegressionLearner(RegressionLearner):
+    """
+    This class performs Least Squares Regression.
+
+    It finds the matrix X which solves:
+
+    :math:`X = argmin(||AX - B||_2)`
+
+    It can be used with intercept or without (by default intercept=True).
+
+    """
+
+    def __init__(self, intercept=True):
+        self._intercept = intercept
+
+    def train(self, matrix_a, matrix_b):
+        return Linalg.lstsq_regression(matrix_a, matrix_b, self._intercept)
+
+
+class RidgeRegressionLearner(RegressionLearner):
+    """
+    This class performs Ridge Regression.
+
+    It finds the matrix X which solves:
+
+    :math:`X = argmin(||AX - B||_2 + \\lambda||X||_2)`
+
+    It can be used with intercept or without (by default intercept=True).
+    Cross validation can be used with default :math:`\\lambda` range of
+    :math:`linspace(0, 5, 11)`. By default Generalized cross validation is performed.
+    If cross validation is set False it requires the input of a :math:`\\lambda` value.
+
+    """
+
+    def __init__(self, intercept=True, param_range=None, crossvalidation=True, param=None):
+        self._intercept = intercept
+        self._param_range = param_range if param_range is not None else np.linspace(0.0, 5, 11)
+
+        self._param = param
+        self._crossvalidation = crossvalidation
+
+        if param:
+            self._crossvalidation = False
+            self._param = param
+
+        if not self._crossvalidation and self._param is None:
+            raise ValueError("Cannot run (no-crossvalidation) RidgeRegression with no lambda value!")
+
+
+    def train(self, matrix_a, matrix_b):
+        """
+        If cross validation is set to True, it performs generalized
+        cross validation. (Hastie, Tibshirani and Friedman, Second edition,
+        page 244).
+        """
+
+        if not self._crossvalidation:
+            return Linalg.ridge_regression(matrix_a, matrix_b, self._param,
+                                           self._intercept)[0]
+
+        else:
+            min_err_param = 0
+            min_err = np.Inf
+            gcv_err = np.Inf
+
+            N = matrix_a.shape[0]
+            for param in self._param_range:
+
+                mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param,
+                                                               self._intercept)
+
+                nom = pow(1 - S_trace / N, 2) * N
+                if nom != 0:
+                    gcv_err = (err1 * err1) / nom
+
+                if gcv_err < min_err:
+                    min_err = gcv_err
+                    min_err_param = param
+
+            #print "lambda:", min_err_param
+            return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param,
+                                           self._intercept)[0]
diff --git a/modules/composes/utils/scoring_utils.py b/modules/composes/utils/scoring_utils.py
new file mode 100755
index 0000000..64dc787
--- /dev/null
+++ b/modules/composes/utils/scoring_utils.py
@@ -0,0 +1,64 @@
+'''
+Created on Oct 17, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+import numpy as np
+from scipy import stats
+
+
+def score(gold, prediction, method):
+    if len(gold) != len(prediction):
+        raise ValueError("The two arrays must have the same length!")
+
+    gold = np.array(gold, dtype=np.double)
+    prediction = np.array(prediction, dtype=np.double)
+
+    if method == "pearson":
+        return pearson(gold, prediction)[0]
+    elif method == "spearman":
+        return spearman(gold, prediction)[0]
+    elif method == "auc":
+        return auc(gold, prediction)
+    else:
+        raise NotImplementedError("Unknown scoring measure:%s" % method)
+
+def pearson(gold, prediction):
+    return stats.pearsonr(gold, prediction)
+
+def spearman(gold, prediction):
+    return stats.spearmanr(gold, prediction, None)
+
+def auc(gold, prediction):
+
+    positive = float(gold[gold == 1].size)
+    negative = float(gold.size - positive)
+
+    total_count = gold.size
+    point_set = np.empty(total_count, dtype = [('gold',float),('score',float)])
+    for i in range(total_count):
+        if not gold[i] in (0,1):
+            raise ValueError("For evaluating AUC, gold scores are required to be 0 or 1.")
+        point_set[i]=(gold[i], prediction[i])
+
+    point_set.sort(order = 'score')
+
+    xi = 1.0
+    yi = 1.0
+    xi_old = 1.0
+    true_positive = positive
+    false_positive = negative
+    auc = 0
+
+    for i in range(total_count):
+        if (point_set[i][0] == 1):
+            true_positive -= 1
+            yi = true_positive / positive
+        else:
+            false_positive -= 1
+            xi = false_positive / negative
+            auc += (xi_old - xi) * yi
+            xi_old = xi
+
+    return auc
diff --git a/modules/composes/utils/space_utils.py b/modules/composes/utils/space_utils.py
new file mode 100755
index 0000000..6cf36ae
--- /dev/null
+++ b/modules/composes/utils/space_utils.py
@@ -0,0 +1,56 @@
+'''
+Created on Sep 26, 2012
+
+@author: Georgiana Dinu, Pham The Nghia
+'''
+
+
+def list2dict(list_):
+    return_dict = {}
+
+    for idx, word in enumerate(list_):
+        if word in return_dict:
+            raise ValueError("duplicate string found in list: %s" % (word))
+        return_dict[word] = idx
+
+    return return_dict
+
+def add_items_to_dict(dict_, list_):
+
+    no_els = len(dict_)
+    for idx, el in enumerate(list_):
+        if el in dict_:
+            raise ValueError("Found duplicate keys when appending elements to\
+                            dictionary.")
+        dict_[el] = no_els + idx
+    return dict_
+
+def assert_dict_match_list(dict_, list_):
+
+    match_err = ValueError("expected matching dictionary and list structures.")
+
+    if not len(list_) == len(dict_):
+        raise match_err
+    for (k, v) in dict_.iteritems():
+        if not list_[v] == k:
+            raise match_err
+
+
+def assert_shape_consistent(matrix_, id2row, id2column, row2id, column2id):
+
+    no_rows = matrix_.mat.shape[0]
+    no_cols = matrix_.mat.shape[1]
+
+    has_column_maps = column2id or id2column
+
+    if not no_rows == len(id2row) or not no_rows == len(row2id):
+        raise ValueError("expected consistent shapes: %d %d %d"
+                         % (no_rows, len(id2row), len(row2id)))
+
+    if (has_column_maps and
+        (not no_cols == len(id2column) or not no_cols == len(column2id))):
+        raise ValueError("expected consistent shapes: %d %d %d"
+                         % (no_cols, len(id2column), len(column2id)))
+
+
+
diff --git a/modules/cupy_utils.py b/modules/cupy_utils.py
new file mode 100644
index 0000000..a0240d9
--- /dev/null
+++ b/modules/cupy_utils.py
@@ -0,0 +1,43 @@
+# Copyright (C) 2018  Mikel Artetxe <artetxem@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import numpy
+
+try:
+    import cupy
+except ImportError:
+    cupy = None
+
+
+def supports_cupy():
+    return cupy is not None
+
+
+def get_cupy():
+    return cupy
+
+
+def get_array_module(x):
+    if cupy is not None:
+        return cupy.get_array_module(x)
+    else:
+        return numpy
+
+
+def asnumpy(x):
+    if cupy is not None:
+        return cupy.asnumpy(x)
+    else:
+        return numpy.asarray(x)
diff --git a/modules/dsm.py b/modules/dsm.py
new file mode 100644
index 0000000..734483b
--- /dev/null
+++ b/modules/dsm.py
@@ -0,0 +1,171 @@
+import sys
+sys.path.append('../')
+
+import logging
+import os
+import itertools
+from gensim import utils
+try:
+    from gensim.models.word2vec_inner import MAX_WORDS_IN_BATCH
+except ImportError:
+    # failed... fall back to plain numpy (20-80x slower training than the above)
+    MAX_WORDS_IN_BATCH = 10000
+
+import gzip
+import bz2
+import pickle
+import numpy as np
+
+from composes.semantic_space.space import Space 
+from collections import defaultdict
+from composes.utils import io_utils
+from scipy.sparse import coo_matrix, csr_matrix
+from composes.matrix.sparse_matrix import SparseMatrix
+from composes.matrix.dense_matrix import DenseMatrix
+
+
+# To-do: should be renamed and restructured
+def save_pkl_files(dsm, dsm_prefix, save_in_one_file=False, save_as_w2v=False):
+    """
+    Save semantic space (from DISSECT package) to different formats.
+    :param dsm: the semantic space
+    :param dsm_prefix: the prefix for the output files
+    :param save_in_one_file: whether to save as one file (pkl or w2v) or separate files (npz for matrix and pkl for rows and columns)
+    :param save_as_w2v: given save_in_one_file=True, whether to save it in w2v format or pkl
+    """
+    
+    # Save in a single file (for small spaces)
+    if save_in_one_file:
+        # only useful for dense spaces
+        if save_as_w2v:
+            rows = np.array(dsm.cooccurrence_matrix.get_mat()).astype(object)
+            id2row = np.array([word.decode('utf-8') for word in dsm.get_id2row()])
+            r, d = rows.shape
+            id2row = id2row.reshape(-1,1)
+            rows = np.concatenate((id2row, rows), axis=1)
+            np.savetxt(dsm_prefix + '.w2v', rows, fmt=["%s"] + ['%.16g',]*d, delimiter=' ', newline='\n', header='%d %d' %(r, d), comments='', encoding='utf-8')
+        else:
+            io_utils.save(dsm, dsm_prefix + '.pkl')
+            
+    # Save in multiple files: npz for the matrix and pkl for the other data members of Space
+    else:
+        mat = coo_matrix(dsm.cooccurrence_matrix.get_mat())
+        np.savez_compressed(dsm_prefix + '.npz', data=mat.data, row=mat.row, col=mat.col, shape=mat.shape)
+
+        with open(dsm_prefix + '_row2id.pkl', 'wb') as f_out:
+            pickle.dump(dsm._row2id, f_out, 2)
+
+        with open(dsm_prefix + '_id2row.pkl', 'wb') as f_out:
+            pickle.dump(dsm._id2row, f_out, 2)
+
+        with open(dsm_prefix + '_column2id.pkl', 'wb') as f_out:
+            pickle.dump(dsm._column2id, f_out, 2)
+
+        with open(dsm_prefix + '_id2column.pkl', 'wb') as f_out:
+            pickle.dump(dsm._id2column, f_out, 2)
+
+
+def load_pkl_files(dsm_prefix):
+    """
+    Load the space from either a single pkl file or numerous files.
+    :param dsm_prefix: the prefix of the input files (.pkl, .rows, .cols)
+    """
+
+    # Check whether there is a single pickle file for the Space object
+    if os.path.isfile(dsm_prefix + '.pkl'):
+        return io_utils.load(dsm_prefix + '.pkl')
+
+    # Load the multiple files: npz for the matrix and pkl for the other data members of Space
+    if os.path.isfile(dsm_prefix + '.npz'):
+        with np.load(dsm_prefix + '.npz') as loader:
+            coo = coo_matrix((loader['data'], (loader['row'], loader['col'])), shape=loader['shape'])
+
+        cooccurrence_matrix = SparseMatrix(csr_matrix(coo))
+
+        with open(dsm_prefix + '_row2id.pkl', 'rb') as f_in:
+            row2id = pickle.load(f_in)
+
+        with open(dsm_prefix + '_id2row.pkl', 'rb') as f_in:
+            id2row = pickle.load(f_in)
+
+        with open(dsm_prefix + '_column2id.pkl', 'rb') as f_in:
+            column2id = pickle.load(f_in)
+
+        with open(dsm_prefix + '_id2column.pkl', 'rb') as f_in:
+            id2column = pickle.load(f_in)
+
+        return Space(cooccurrence_matrix, id2row, id2column, row2id=row2id, column2id=column2id)
+
+    if os.path.isfile(dsm_prefix + '.tsv'):
+        values = np.loadtxt(dsm_prefix + '.tsv', dtype=float, delimiter='\t', skiprows=0, comments='', encoding='utf-8')
+        targets = np.loadtxt(dsm_prefix + '.rows', dtype=str, skiprows=0, comments='', encoding='utf-8')
+        # Convert to space in sparse matrix format        
+        return Space(SparseMatrix(values), list(targets), [])
+    
+    # If everything fails try to load it as single w2v file
+    space_array = np.loadtxt(dsm_prefix + '.w2v', dtype=object, delimiter=' ', skiprows=1, comments='', encoding='utf-8')
+    targets = space_array[:,0].flatten()
+    values = space_array[:,1:].astype(np.float)
+    # Convert to space and sparse matrix format        
+    return Space(SparseMatrix(values), list(targets), [])
+
+    
+class PathLineSentences_mod(object):
+    """
+    Simple format: date\tsentence = one line; words already preprocessed and separated by whitespace.
+    Like LineSentence, but will process all files in a directory in alphabetical order by filename
+    """
+
+    def __init__(self, source, max_sentence_length=MAX_WORDS_IN_BATCH, limit=None, lowerBound=-9999, upperBound=9999):
+        """
+        `source` should be a path to a directory (as a string) where all files can be opened by the
+        LineSentence class. Each file will be read up to
+        `limit` lines (or no clipped if limit is None, the default).
+
+        Example::
+
+            sentences = LineSentencePath_mod(os.getcwd() + '\\corpus\\')
+
+        The files in the directory should be either text files, .bz2 files, or .gz files.
+
+        """
+        self.source = source
+        self.max_sentence_length = max_sentence_length
+        self.limit = limit
+        self.lowerBound = lowerBound
+        self.upperBound = upperBound
+        self.corpusSize = 0
+        
+
+        if os.path.isfile(self.source):
+            logging.warning('single file read, better to use models.word2vec.LineSentence')
+            self.input_files = [self.source]  # force code compatibility with list of files
+        elif os.path.isdir(self.source):
+            self.source = os.path.join(self.source, '')  # ensures os-specific slash at end of path
+            logging.debug('reading directory ' + self.source)
+            self.input_files = os.listdir(self.source)
+            self.input_files = [self.source + file for file in self.input_files]  # make full paths
+            self.input_files.sort()  # makes sure it happens in filename order
+        else:  # not a file or a directory, then we can't do anything with it
+            raise ValueError('input is neither a file nor a path')
+        
+        logging.info('files read into PathLineSentences_mod:' + '\n'.join(self.input_files))
+
+    def __iter__(self):
+        '''iterate through the files'''
+        for file_name in self.input_files:
+            if '.DS_Store' in file_name:
+                continue
+            logging.info('reading file ' + file_name)           
+            with utils.smart_open(file_name) as fin:
+                for line in itertools.islice(fin, self.limit):
+                    lineSplit = line.split("\t")
+                    date, line = int(lineSplit[0]), utils.to_unicode(lineSplit[1]).split() # Get date and sentence
+                    if not self.lowerBound <= date <= self.upperBound: # skip every sentence which is not in timeframe
+                        continue
+                    self.corpusSize+=len(line)
+                    i = 0
+                    while i < len(line):
+                        yield line[i:i + self.max_sentence_length]
+                        i += self.max_sentence_length
+
diff --git a/modules/dsm.pyc b/modules/dsm.pyc
new file mode 100644
index 0000000..79efef5
Binary files /dev/null and b/modules/dsm.pyc differ
diff --git a/modules/embeddings.py b/modules/embeddings.py
new file mode 100644
index 0000000..9a407a5
--- /dev/null
+++ b/modules/embeddings.py
@@ -0,0 +1,80 @@
+# Copyright (C) 2016-2018  Mikel Artetxe <artetxem@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from cupy_utils import *
+
+import numpy as np
+
+
+def read(file, threshold=0, vocabulary=None, dtype='float'):
+    header = file.readline().split(' ')
+    count = int(header[0]) if threshold <= 0 else min(threshold, int(header[0]))
+    dim = int(header[1])
+    words = []
+    matrix = np.empty((count, dim), dtype=dtype) if vocabulary is None else []
+    for i in range(count):
+        word, vec = file.readline().split(' ', 1)
+        if vocabulary is None:
+            words.append(word)
+            matrix[i] = np.fromstring(vec, sep=' ', dtype=dtype)
+        elif word in vocabulary:
+            words.append(word)
+            matrix.append(np.fromstring(vec, sep=' ', dtype=dtype))
+    return (words, matrix) if vocabulary is None else (words, np.array(matrix, dtype=dtype))
+
+
+def write(words, matrix, file):
+    m = asnumpy(matrix)
+    print('%d %d' % m.shape, file=file)
+    for i in range(len(words)):
+        print(words[i] + ' ' + ' '.join(['%.6g' % x for x in m[i]]), file=file)
+
+
+def length_normalize(matrix):
+    xp = get_array_module(matrix)
+    norms = xp.sqrt(xp.sum(matrix**2, axis=1))
+    norms[norms == 0] = 1
+    matrix /= norms[:, xp.newaxis]
+
+
+def mean_center(matrix):
+    xp = get_array_module(matrix)
+    avg = xp.mean(matrix, axis=0)
+    matrix -= avg
+
+
+def length_normalize_dimensionwise(matrix):
+    xp = get_array_module(matrix)
+    norms = xp.sqrt(xp.sum(matrix**2, axis=0))
+    norms[norms == 0] = 1
+    matrix /= norms
+
+
+def mean_center_embeddingwise(matrix):
+    xp = get_array_module(matrix)
+    avg = xp.mean(matrix, axis=1)
+    matrix -= avg[:, xp.newaxis]
+
+
+def normalize(matrix, actions):
+    for action in actions:
+        if action == 'unit':
+            length_normalize(matrix)
+        elif action == 'center':
+            mean_center(matrix)
+        elif action == 'unitdim':
+            length_normalize_dimensionwise(matrix)
+        elif action == 'centeremb':
+            mean_center_embeddingwise(matrix)
diff --git a/representations/count.py b/representations/count.py
new file mode 100644
index 0000000..4063540
--- /dev/null
+++ b/representations/count.py
@@ -0,0 +1,100 @@
+import sys
+sys.path.append('./modules/')
+
+from collections import defaultdict
+from docopt import docopt
+import logging
+import time
+import numpy as np
+from dsm import save_pkl_files, PathLineSentences_mod
+from scipy.sparse import dok_matrix, csr_matrix, linalg
+from composes.semantic_space.space import Space
+from composes.matrix.sparse_matrix import SparseMatrix
+
+
+def main():
+    """
+    Make count-based vector space from corpus.
+    """
+
+    # Get the arguments
+    args = docopt("""Make count-based vector space from corpus.
+
+    Usage:
+        count.py [-l] <windowSize> <corpDir> <outPath> <lowerBound> <upperBound>
+        
+    Arguments:
+       
+        <corpDir> = path to corpus directory with zipped files, each sentence in form 'year\tword1 word2 word3...'
+        <outPath> = output path for vectors
+        <windowSize> = the linear distance of context words to consider in each direction
+        <lowerBound> = lower bound for time period
+        <upperBound> = upper bound for time period
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+
+    """)
+    
+    is_len = args['--len']
+    corpDir = args['<corpDir>']
+    outPath = args['<outPath>']
+    windowSize = int(args['<windowSize>'])    
+    lowerBound = int(args['<lowerBound>'])
+    upperBound = int(args['<upperBound>'])
+    
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()
+
+    # Build vocabulary
+    logging.info("Building vocabulary")
+    sentences = PathLineSentences_mod(corpDir, lowerBound=lowerBound, upperBound=upperBound)
+    vocabulary = list(set([word for sentence in sentences for word in sentence if len(sentence)>1])) # Skip one-word sentences to avoid zero-vectors
+    w2i = {w: i for i, w in enumerate(vocabulary)}
+    
+    # Initialize co-occurrence matrix as dictionary
+    cooc_mat = defaultdict(lambda: 0)
+
+    # Get counts from corpus
+    sentences = PathLineSentences_mod(corpDir, lowerBound=lowerBound, upperBound=upperBound)
+    logging.info("Counting context words")
+    for sentence in sentences:
+        for i, word in enumerate(sentence):
+            lowerWindowSize = max(i-windowSize, 0)
+            upperWindowSize = min(i+windowSize, len(sentence))
+            window = sentence[lowerWindowSize:i] + sentence[i+1:upperWindowSize+1]
+            if len(window)==0: # Skip one-word sentences
+                continue
+            windex = w2i[word]
+            for contextWord in window:
+                cooc_mat[(windex,w2i[contextWord])] += 1
+
+    
+    # Convert dictionary to sparse matrix
+    logging.info("Converting dictionary to matrix")
+    cooc_mat_sparse = dok_matrix((len(vocabulary),len(vocabulary)), dtype=float)
+    try:
+        cooc_mat_sparse.update(cooc_mat)
+    except NotImplementedError:
+        cooc_mat_sparse._update(cooc_mat)
+    
+    if is_len:
+        # L2-normalize vectors
+        l2norm1 = linalg.norm(cooc_mat_sparse, axis=1, ord=2)
+        l2norm1[l2norm1==0.0] = 1.0 # Convert 0 values to 1
+        cooc_mat_sparse /= l2norm1.reshape(len(l2norm1),1)
+
+    # Make space
+    vocabulary = [v.encode('utf-8') for v in vocabulary]
+    countSpace = Space(SparseMatrix(cooc_mat_sparse), vocabulary, vocabulary)
+    
+    # Save the Space object in pickle format
+    save_pkl_files(countSpace, outPath, save_in_one_file=False)    
+        
+    logging.info("Corpus has size %d" % sentences.corpusSize)
+    logging.info("--- %s seconds ---" % (time.time() - start_time))
+
+    
+if __name__ == '__main__':
+    main()
diff --git a/representations/ppmi.py b/representations/ppmi.py
new file mode 100644
index 0000000..9652dec
--- /dev/null
+++ b/representations/ppmi.py
@@ -0,0 +1,103 @@
+import sys
+sys.path.append('./modules/')
+
+import numpy as np
+from docopt import docopt
+from scipy.sparse import csc_matrix, coo_matrix, linalg
+from composes.utils import io_utils
+from composes.semantic_space.space import Space
+from composes.utils.py_matrix_utils import nonzero_invert
+from composes.transformation.scaling.ppmi_weighting import PpmiWeighting
+from composes.matrix.sparse_matrix import SparseMatrix
+from dsm import save_pkl_files, load_pkl_files
+import logging
+import time
+
+
+def main():
+    """
+    Compute the smoothed and shifted (P)PMI matrix from a co-occurrence matrix. Smoothing is performed as described in
+
+      Omer Levy, Yoav Goldberg, and Ido Dagan. 2015. Improving distributional similarity with lessons learned from word embeddings. Trans. ACL, 3.
+
+    """
+
+    # Get the arguments
+    args = docopt('''Compute the smoothed and shifted (P)PMI matrix from a co-occurrence matrix and save it in pickle format.
+
+    Usage:
+        ppmi.py [-l] <dsm_prefix> <k> <alpha> <outPath>
+
+        <dsm_prefix> = the prefix for the input files (.sm for the matrix, .rows and .cols) and output files (.ppmi)
+        <k> = shifting parameter
+        <alpha> = smoothing parameter
+        <outPath> = output path for space
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+
+    ''')
+
+    is_len = args['--len']
+    dsm_prefix = args['<dsm_prefix>']
+    k = int(args['<k>'])
+    alpha = float(args['<alpha>'])
+    outPath = args['<outPath>']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    # Get space with sparse matrix
+    dsm = load_pkl_files(dsm_prefix)
+    id2row = dsm.get_id2row()
+    id2column = dsm.get_id2column()
+
+    # Get probabilities
+    matrix_ = dsm.cooccurrence_matrix
+
+    matrix_.assert_positive()
+    row_sum = matrix_.sum(axis = 1)
+    col_sum = matrix_.sum(axis = 0)
+
+    # Compute smoothed P_alpha(c)
+    smooth_col_sum = np.power(col_sum, alpha)
+    col_sum = smooth_col_sum/smooth_col_sum.sum()
+
+    # Compute P(w)
+    row_sum = nonzero_invert(row_sum)
+    col_sum = nonzero_invert(col_sum)
+    
+    # Apply epmi weighting (without log)
+    matrix_ = matrix_.scale_rows(row_sum)
+    matrix_ = matrix_.scale_columns(col_sum)
+
+    # Apply log weighting
+    matrix_.mat.data = np.log(matrix_.mat.data)
+
+    # Shift values
+    matrix_.mat.data -= np.log(k)
+
+    # Eliminate negative counts
+    matrix_.mat.data[matrix_.mat.data <= 0] = 0.0
+
+    # Eliminate zero counts
+    matrix_.mat.eliminate_zeros()
+    
+    matrix_ = matrix_.get_mat()
+    
+    if is_len:
+        # L2-normalize vectors
+        l2norm1 = linalg.norm(matrix_, axis=1, ord=2)
+        l2norm1[l2norm1==0.0] = 1.0 # Convert 0 values to 1
+        matrix_ /= l2norm1.reshape(len(l2norm1),1)
+
+    dsm = Space(SparseMatrix(matrix_), id2row, id2column)
+   
+    # Save the Space object in pickle format
+    save_pkl_files(dsm, outPath + ".ppmi.sm", save_in_one_file=False)
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+
+if __name__ == '__main__':
+    main()
diff --git a/representations/ri.py b/representations/ri.py
new file mode 100644
index 0000000..89bc4bb
--- /dev/null
+++ b/representations/ri.py
@@ -0,0 +1,151 @@
+import sys
+sys.path.append('./modules/')
+
+import os
+from os.path import basename
+from docopt import docopt
+from dsm import load_pkl_files, save_pkl_files
+import logging
+import time
+import codecs
+import numpy as np
+from composes.semantic_space.space import Space
+from composes.matrix.dense_matrix import DenseMatrix
+from composes.matrix.sparse_matrix import SparseMatrix
+from sklearn.random_projection import sparse_random_matrix
+from scipy.sparse import lil_matrix, csr_matrix, csc_matrix
+
+
+def main():
+    """
+    Create low-dimensional vector space by sparse random indexing from co-occurrence matrix.
+    """
+
+    # Get the arguments
+    args = docopt('''Create low-dimensional vector space by sparse random indexing from co-occurrence matrix.
+
+    Usage:
+        reduce_matrix_ri.py [-l] (-s <seeds> | -a) <dim> <t> <outPath> <outPathElement> <spacePrefix>
+
+        <seeds> = number of non-zero values in each random vector
+        <dim> = number of dimensions for random vectors
+        <t> = threshold for downsampling (if t=None, no subsampling is applied)
+        <outPath> = output path for reduced space 
+        <outPathElement> = output path for elemental space (context vectors)
+        <spacePrefix> = path to pickled space without suffix
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+        -s, --see   specify number of seeds manually
+        -a, --aut   calculate number of seeds automatically as proposed in [1,2]
+
+    References:
+        [1] Ping Li, T. Hastie and K. W. Church, 2006,
+           "Very Sparse Random Projections".
+           http://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
+        [2] D. Achlioptas, 2001, "Database-friendly random projections",
+           http://www.cs.ucsc.edu/~optas/papers/jl.pdf
+
+    ''')
+    
+    is_len = args['--len']
+    is_seeds = args['--see']
+    if is_seeds:
+        seeds = int(args['<seeds>'])
+    is_aut = args['--aut']
+    dim = int(args['<dim>'])
+    if args['<t>']=='None':
+        t = None
+    else:
+        t = float(args['<t>'])
+    outPath = args['<outPath>']
+    outPathElement = args['<outPathElement>']
+    spacePrefix = args['<spacePrefix>']
+    
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    # Load input space
+    space1 = load_pkl_files(spacePrefix)
+    matrix1 = space1.get_cooccurrence_matrix()    
+
+    # Get mappings between rows/columns and words
+    id2row1 = space1.get_id2row()
+    id2column1 = space1.get_id2column()
+    column2id1 = space1.get_column2id()
+    
+    ## Generate ternary random vectors
+    if is_seeds:
+        elementalMatrix = np.zeros((len(id2column1),dim))
+        # Generate base vector for random vectors 
+        baseVector = np.zeros(dim) # Note: Make sure that number of seeds is not greater than dimensions
+        for i in range(0,seeds/2):
+            baseVector[i] = 1.0
+        for i in range(seeds/2,seeds):
+            baseVector[i] = -1.0
+        for i in range(len(id2column1)):
+            np.random.shuffle(baseVector)
+            elementalMatrix[i] = baseVector
+    if is_aut:
+        elementalMatrix = sparse_random_matrix(dim,len(id2column1)).toarray().T
+
+    elementalMatrix = csc_matrix(elementalMatrix)
+    # to-do: get rid of transformation into sparse matrices by initializing them as such
+
+    # Initialize target vectors
+    reducedMatrix1 = np.zeros((len(id2row1),dim))    
+
+    # Get number of total occurrences of any word
+    totalOcc = np.sum(matrix1.get_mat())
+
+    # Define function for downsampling
+    downsample = lambda f: np.sqrt(float(t)/f) if f>t else 1.0
+    downsample = np.vectorize(downsample)
+    
+    # Get total normalized co-occurrence frequency of all contexts in space
+    context_freqs = np.array(matrix1.sum(axis=0))/totalOcc
+    
+    #to-do: matrix multiplication is done row-wise, do this matrix-wise
+    # Iterate over rows of space, find context words and update reduced matrix with low-dimensional random vectors of these context words
+    for (space,matrix,id2row,id2column,column2id,reducedMatrix) in [(space1,matrix1,id2row1,id2column1,column2id1,reducedMatrix1)]:
+        # Iterate over targets
+        for i, target in enumerate(id2row):
+            # Get co-occurrence values as matrix
+            m = space.get_row(target).get_mat()
+            # Get nonzero indexes and data
+            nonzeros = m.nonzero()
+            data = m.data            
+            # Smooth context distribution
+            pos_context_vectors = elementalMatrix[nonzeros[1]]
+            if t!=None:
+                # Apply subsampling
+                rfs = context_freqs[0,nonzeros[1]]
+                rfs = downsample(rfs)
+                data *= rfs
+            data = csc_matrix(data)
+            # Weight context vectors by occurrence frequency
+            pos_context_vectors = pos_context_vectors.multiply(data.reshape(-1,1))
+            pos_context_vectors = np.sum(pos_context_vectors, axis=0)
+            # Add up context vectors and store as row for target
+            reducedMatrix[i] = pos_context_vectors
+              
+    if is_len:
+        # L2-normalize vectors
+        l2norm1 = np.linalg.norm(reducedMatrix1, axis=1, ord=2)
+        l2norm1[l2norm1==0.0] = 1.0 # Convert 0 values to 1
+        reducedMatrix1 /= l2norm1.reshape(len(l2norm1),1)
+    
+    # Make spaces
+    reducedSpace1 = Space(DenseMatrix(reducedMatrix1), id2row1, [])
+    elementalSpace = Space(SparseMatrix(elementalMatrix), id2column1, [])
+    
+    # Save the Space objects in pickle format
+    save_pkl_files(reducedSpace1, outPath + '.ri.dm', save_in_one_file=True, save_as_w2v=True)
+    save_pkl_files(elementalSpace, outPathElement + '.sm', save_in_one_file=False)
+
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+    
+if __name__ == '__main__':
+    main()
diff --git a/representations/sgns.py b/representations/sgns.py
new file mode 100644
index 0000000..e179f3b
--- /dev/null
+++ b/representations/sgns.py
@@ -0,0 +1,95 @@
+import sys
+sys.path.append('./modules/')
+
+import codecs
+from collections import defaultdict
+import os
+from os.path import basename
+import zipfile
+from docopt import docopt
+import logging
+import logging.config
+import time
+import gensim
+from dsm import PathLineSentences_mod
+
+
+
+def main():
+    """
+    Make embedding vector space with Negative Sampling from corpus.
+    """
+
+    # Get the arguments
+    args = docopt("""Make embedding vector space with Skip-Gram with Negative Sampling from corpus.
+
+    Usage:
+        sgns.py [-l] <windowSize> <dim> <k> <t> <minCount> <itera> <corpDir> <outPath> <lowerBound> <upperBound>
+        
+    Arguments:
+       
+        <windowSize> = the linear distance of context words to consider in each direction
+        <dim> = dimensionality of embeddings
+        <k> = number of negative samples parameter (equivalent to shifting parameter for PPMI)
+        <t> = threshold for subsampling
+        <minCount> = number of occurrences for a word to be included in the vocabulary
+        <itera> = number of iterations
+        <corpDir> = path to corpus directory with zipped files, each sentence in form 'year\tword1 word2 word3...'
+        <outPath> = output path for vectors
+        <lowerBound> = lower bound for time period
+        <upperBound> = upper bound for time period
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+
+    """)
+
+    is_len = args['--len']
+    windowSize = int(args['<windowSize>'])    
+    dim = int(args['<dim>'])    
+    k = int(args['<k>'])
+    if args['<t>']=='None':
+        t = None
+    else:
+        t = float(args['<t>'])        
+    minCount = int(args['<minCount>'])    
+    itera = int(args['<itera>'])    
+    corpDir = args['<corpDir>']
+    outPath = args['<outPath>']
+    lowerBound = int(args['<lowerBound>'])
+    upperBound = int(args['<upperBound>'])
+
+    logging.config.dictConfig({'version': 1, 'disable_existing_loggers': True,})
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+         
+    # Initialize model
+    model = gensim.models.Word2Vec(sg=1, # skipgram
+    							   hs=0, # negative sampling
+    							   negative=k, # number of negative samples
+    							   sample=t, # threshold for subsampling, if None, no subsampling is performed
+    							   size=dim, window=windowSize, min_count=minCount, iter=itera, workers=20)
+
+    # Initialize vocabulary
+    vocab_sentences = PathLineSentences_mod(corpDir, lowerBound=lowerBound, upperBound=upperBound)
+    model.build_vocab(vocab_sentences)
+
+    # Train
+    sentences = PathLineSentences_mod(corpDir, lowerBound=lowerBound, upperBound=upperBound)
+    model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)
+
+    if is_len:
+        # L2-normalize vectors
+        model.init_sims(replace=True)
+
+    # Save the vectors and the model
+    model.wv.save_word2vec_format(outPath + '.w2v')
+    #model.save(outPath + '.model')
+
+    logging.info("Corpus has size %d" % vocab_sentences.corpusSize)
+    logging.info("--- %s seconds ---" % (time.time() - start_time))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/representations/svd.py b/representations/svd.py
new file mode 100644
index 0000000..6794135
--- /dev/null
+++ b/representations/svd.py
@@ -0,0 +1,84 @@
+import sys
+sys.path.append('./modules/')
+
+import numpy as np
+from docopt import docopt
+from composes.utils import io_utils
+from composes.semantic_space.space import Space
+from composes.matrix.dense_matrix import DenseMatrix
+from sklearn.utils.extmath import randomized_svd
+from dsm import save_pkl_files, load_pkl_files
+import logging
+import time
+
+
+def main():
+    """
+    Perform dimensionality reduction on a (normally PPMI) matrix by applying truncated SVD as described in
+
+      Omer Levy, Yoav Goldberg, and Ido Dagan. 2015. Improving distributional similarity with lessons learned from word embeddings. Trans. ACL, 3.
+
+    """
+
+    # Get the arguments
+    args = docopt('''Perform dimensionality reduction on a (normally PPMI) matrix by applying truncated SVD and save it in pickle format.
+
+    Usage:
+        svd.py [-l] <dsm_prefix> <dim> <gamma> <outPath>
+
+        <dsm_prefix> = the prefix for the input files (.sm for the matrix, .rows and .cols) and output files (.svd)
+        <dim> = dimensionality of low-dimensional output vectors
+        <gamma> = eigenvalue weighting parameter
+        <outPath> = output path for space
+
+    Options:
+        -l, --len   normalize final vectors to unit length
+
+    ''')
+
+    is_len = args['--len']
+    dsm_prefix = args['<dsm_prefix>']
+    dim = int(args['<dim>'])
+    gamma = float(args['<gamma>'])
+    outPath = args['<outPath>']
+
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.info(__file__.upper())
+    start_time = time.time()    
+
+    # Get space with sparse matrix
+    dsm = load_pkl_files(dsm_prefix)
+    
+    id2row = dsm.get_id2row()
+
+    # Get matrix from space
+    matrix_ = dsm.get_cooccurrence_matrix()
+
+    # Apply SVD
+    u, s, v = randomized_svd(matrix_.get_mat(), n_components=dim, n_iter=5, transpose=False)
+
+    # Weight matrix
+    if gamma == 0.0:
+        matrix_ = u
+    elif gamma == 1.0:
+        #matrix_ = np.dot(u, np.diag(s)) # This is equivalent to the below formula (because s is a flattened diagonal matrix)
+        matrix_ = s * u        
+    else:
+        #matrix_ = np.dot(u, np.power(np.diag(s), gamma)) # This is equivalent to the below formula
+        matrix_ = np.power(s, gamma) * u
+
+    if is_len:
+        # L2-normalize vectors
+        l2norm1 = np.linalg.norm(matrix_, axis=1, ord=2)
+        l2norm1[l2norm1==0.0] = 1.0 # Convert 0 values to 1
+        matrix_ /= l2norm1.reshape(len(l2norm1),1)
+        
+    dsm = Space(DenseMatrix(matrix_), id2row, [])
+            
+    # Save the Space object in pickle format
+    save_pkl_files(dsm, outPath + ".svd.dm", save_in_one_file=True, save_as_w2v=True)
+    logging.info("--- %s seconds ---" % (time.time() - start_time))                   
+
+
+if __name__ == '__main__':
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f911d47
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+# packages to install:
+docopt==0.6.2
+gensim==3.7.3
diff --git a/scripts/make_results_disp.sh b/scripts/make_results_disp.sh
new file mode 100644
index 0000000..025c9cb
--- /dev/null
+++ b/scripts/make_results_disp.sh
@@ -0,0 +1,118 @@
+### THIS SCRIPT PRODUCES RESULTS FOR DISPERSION MEASURES (FD, TD, HD) ON COUNT SPACES ###
+
+### Define global parameters ###
+# Test parameters
+declare -a windowSizes=(1)
+declare -a globalmatrixfolderprefix=matrices/test_disp
+declare -a globalresultfolderprefix=results/test_disp
+declare -a parameterfile=scripts/parameters_test.sh
+
+# DURel parameters
+#declare -a windowSizes=(1)
+#declare -a globalmatrixfolderprefix=matrices/durel_disp
+#declare -a globalresultfolderprefix=results/durel_disp
+#declare -a parameterfile=scripts/parameters_durel.sh
+
+# SURel parameters
+#declare -a windowSizes=(1)
+#declare -a globalmatrixfolderprefix=matrices/surel_disp
+#declare -a globalresultfolderprefix=results/surel_disp
+#declare -a parameterfile=scripts/parameters_surel.sh
+
+
+# Get corpus- and testset-specific parameters
+source $parameterfile
+
+# Overwrite any specific parameters here
+declare -a ks=(1)
+declare -a ts=(None)
+declare -a iterations=(1)
+declare -a dim=5
+declare -a matrixfolder1=$globalmatrixfolder1
+declare -a matrixfolder2=$globalmatrixfolder2
+declare -a matrixfoldercomb=$globalmatrixfoldercomb
+matrixfolders=($globalmatrixfolder1 $globalmatrixfolder2)
+
+# Run model code
+lowerBound=$lowerBound1
+upperBound=$upperBound1
+outfolder=$countmatrixfolder1
+source scripts/run_CNT.sh # Raw Count for first time period
+lowerBound=$lowerBound2
+upperBound=$upperBound2
+outfolder=$countmatrixfolder2
+source scripts/run_CNT.sh # Raw Count for second time period
+
+# Get frequencies
+lowerBound=$lowerBound1
+upperBound=$upperBound1
+outfolder=$freqresultfolder1
+source scripts/run_FREQ.sh # Raw token frequency in first time period
+norm=$freqnorm1
+source scripts/run_NFREQ.sh # Normalized frequency
+lowerBound=$lowerBound2
+upperBound=$upperBound2
+outfolder=$freqresultfolder2
+source scripts/run_FREQ.sh # Raw token frequency in second time period
+norm=$freqnorm2
+source scripts/run_NFREQ.sh
+infolder=$freqresultfolder1
+outfolder=$freqresultfolder1
+source scripts/run_TRSF.sh # Log transformation
+infolder=$freqresultfolder2
+outfolder=$freqresultfolder2
+source scripts/run_TRSF.sh
+# Subtract values
+infolder1=$freqresultfolder1
+infolder2=$freqresultfolder2
+outfolder=$resultfolder
+source scripts/run_SBTR.sh # Subtract frequencies (Frequency Difference)
+
+# Get types
+matrixfolder=$countmatrixfolder1
+outfolder=$typesresultfolder1
+source scripts/run_TYPE.sh # Number of context types in first time period
+norm=$typesnorm1
+source scripts/run_NTYPE.sh # Normalized number of context types
+matrixfolder=$countmatrixfolder2
+outfolder=$typesresultfolder2
+source scripts/run_TYPE.sh # Number of context types in second time period	
+norm=$typesnorm2
+source scripts/run_NTYPE.sh
+infolder=$typesresultfolder1
+outfolder=$typesresultfolder1
+source scripts/run_TRSF.sh # Log transformation
+infolder=$typesresultfolder2
+outfolder=$typesresultfolder2
+source scripts/run_TRSF.sh
+# Subtract values
+infolder1=$typesresultfolder1
+infolder2=$typesresultfolder2
+outfolder=$resultfolder
+source scripts/run_SBTR.sh # Subtract types (Type Difference)
+
+# Get entropies
+matrixfolder=$countmatrixfolder1
+outfolder=$entropyresultfolder1
+source scripts/run_ENTR.sh # Entropy in first time period
+source scripts/run_NENTR.sh # Normalized Entropy, by number of context types
+matrixfolder=$countmatrixfolder2
+outfolder=$entropyresultfolder2
+source scripts/run_ENTR.sh # Entropy in second time period
+source scripts/run_NENTR.sh	
+infolder=$entropyresultfolder1
+outfolder=$entropyresultfolder1
+source scripts/run_TRSF.sh # Log transformation
+infolder=$entropyresultfolder2
+outfolder=$entropyresultfolder2
+source scripts/run_TRSF.sh
+# Subtract values
+infolder1=$entropyresultfolder1
+infolder2=$entropyresultfolder2
+outfolder=$resultfolder
+source scripts/run_SBTR.sh # Subtract entropy (Entropy Difference)
+
+# Evaluate results
+resultfolder=$resultfolder
+outfolder=$globalresultfolder
+source scripts/run_SPR.sh # Get Spearman correlation of measure predictions with gold scores
diff --git a/scripts/make_results_sim.sh b/scripts/make_results_sim.sh
new file mode 100644
index 0000000..c4c8bfc
--- /dev/null
+++ b/scripts/make_results_sim.sh
@@ -0,0 +1,110 @@
+### THIS SCRIPT PRODUCES RESULTS FOR SIMILARITY MEASURES (CD, LND) ON ALL VECTOR SPACE AND ALIGNMENT TYPES EXCEPT WORD INJECTION ###
+
+## Define global parameters ##
+# Test parameters
+declare -a windowSizes=(1) # Window sizes for all models
+declare -a globalmatrixfolderprefix=matrices/test_sim # parent folder for matrices
+declare -a globalresultfolderprefix=results/test_sim # parent folder for results
+declare -a parameterfile=scripts/parameters_test.sh # corpus- and testset-specific parameter specifications
+
+# DURel parameters
+#declare -a windowSizes=(1)
+#declare -a globalmatrixfolderprefix=matrices/durel_sim
+#declare -a globalresultfolderprefix=results/durel_sim
+#declare -a parameterfile=scripts/parameters_durel.sh
+
+# SURel parameters
+#declare -a windowSizes=(1)
+#declare -a globalmatrixfolderprefix=matrices/surel_sim
+#declare -a globalresultfolderprefix=results/surel_sim
+#declare -a parameterfile=scripts/parameters_surel.sh
+
+
+# Get corpus- and testset-specific parameters
+source $parameterfile
+
+# Overwrite any specific parameters here
+declare -a ks=(1)
+declare -a ts=(None)
+declare -a iterations=(1)
+declare -a dim=5
+declare -a matrixfolder1=$globalmatrixfolder1
+declare -a matrixfolder2=$globalmatrixfolder2
+declare -a matrixfoldercomb=$globalmatrixfoldercomb
+matrixfolders=($globalmatrixfolder1 $globalmatrixfolder2)
+
+# Run model code
+lowerBound=$lowerBound1
+upperBound=$upperBound1
+outfolder=$sgnsmatrixfolder1
+source scripts/run_SGNS.sh # Skip-Gram with Negative Sampling for first time period
+lowerBound=$lowerBound2
+upperBound=$upperBound2
+outfolder=$sgnsmatrixfolder2
+source scripts/run_SGNS.sh # for second time period
+infolder=$sgnsmatrixfolder1
+outfolder=$sgnsmatrixfolder2
+source scripts/run_SGNS_VI.sh # Skip-Gram with Negative Sampling aligned by Vector Initialization
+lowerBound=$lowerBound1
+upperBound=$upperBound1
+outfolder=$countmatrixfolder1
+source scripts/run_CNT.sh # Raw Count
+lowerBound=$lowerBound2
+upperBound=$upperBound2
+outfolder=$countmatrixfolder2
+source scripts/run_CNT.sh
+matrixfolder=$countmatrixfolder1
+outfolder=$rimatrixfolder1
+source scripts/run_RI.sh # Random Indexing
+matrixfolder=$countmatrixfolder2
+outfolder=$rimatrixfolder2
+source scripts/run_RI.sh
+matrixfolder=$countmatrixfolder1
+outfolder=$ppmimatrixfolder1
+source scripts/run_PPMI.sh # PPMI weighting of count matrix
+matrixfolder=$countmatrixfolder2
+outfolder=$ppmimatrixfolder2
+source scripts/run_PPMI.sh
+matrixfolder=$ppmimatrixfolder1
+outfolder=$svdmatrixfolder1
+source scripts/run_SVD.sh # SVD on PPMI matrix
+matrixfolder=$ppmimatrixfolder2
+outfolder=$svdmatrixfolder2
+source scripts/run_SVD.sh  
+
+# Align matrices
+outfolder1=$alignedmatrixfolder1
+outfolder2=$alignedmatrixfolder2
+
+matrixfolder1=$countmatrixfolder1
+matrixfolder2=$countmatrixfolder2
+source scripts/run_CI.sh # Column Intersection alignment of count matrices
+matrixfolder1=$countmatrixfolder1
+matrixfolder2=$countmatrixfolder2
+source scripts/run_SRV.sh # Shared Random Vector alignment
+
+# Align matrices
+matrixfolder1=$ppmimatrixfolder1
+matrixfolder2=$ppmimatrixfolder2
+source scripts/run_CI.sh # Column Intersection alignment of PPMI matrices
+matrixfolder1=$sgnsmatrixfolder1
+matrixfolder2=$sgnsmatrixfolder2
+source scripts/run_OP.sh # Orthogonal Procrustes alignment for SGNS
+matrixfolder1=$rimatrixfolder1
+matrixfolder2=$rimatrixfolder2
+source scripts/run_OP.sh # Orthogonal Procrustes alignment for RI
+matrixfolder1=$svdmatrixfolder1
+matrixfolder2=$svdmatrixfolder2
+source scripts/run_OP.sh # Orthogonal Procrustes alignment for SVD
+
+# Measure change scores from aligned matrices
+matrixfolder1=$alignedmatrixfolder1
+matrixfolder2=$alignedmatrixfolder2
+outfolder=$resultfolder
+source scripts/run_CD.sh # Cosine Distance
+source scripts/run_LND.sh # Local Neighborhood Distance
+
+# Evaluate results
+resultfolder=$resultfolder
+outfolder=$globalresultfolder
+source scripts/run_SPR.sh # Get Spearman correlation of measure predictions with gold scores
diff --git a/scripts/make_results_wi.sh b/scripts/make_results_wi.sh
new file mode 100644
index 0000000..b84b416
--- /dev/null
+++ b/scripts/make_results_wi.sh
@@ -0,0 +1,65 @@
+### THIS SCRIPT PRODUCES RESULTS FOR SIMILARITY MEASURES (CD, LND) ON ALL VECTOR SPACE TYPES WITH WORD INJECTION ###
+
+## Define global parameters ##
+# Test parameters
+declare -a windowSizes=(1)
+declare -a globalmatrixfolderprefix=matrices/test_wi
+declare -a globalresultfolderprefix=results/test_wi
+declare -a parameterfile=scripts/parameters_test.sh
+
+# DURel parameters
+#declare -a windowSizes=(1)
+#declare -a globalmatrixfolderprefix=matrices/durel_wi
+#declare -a globalresultfolderprefix=results/durel_wi
+#declare -a parameterfile=scripts/parameters_durel.sh
+
+# SURel parameters
+#declare -a windowSizes=(1)
+#declare -a globalmatrixfolderprefix=matrices/surel_wi
+#declare -a globalresultfolderprefix=results/surel_wi
+#declare -a parameterfile=scripts/parameters_surel.sh
+
+
+# Get corpus- and testset-specific parameters
+source $parameterfile
+
+# Overwrite any specific parameters here
+declare -a ks=(1)
+declare -a ts=(None)
+declare -a iterations=(1)
+declare -a dim=5
+testset=$testsetwi
+
+declare -a matrixfolder=$globalmatrixfolderwi
+matrixfolders=($sgnsmatrixfolderwi $countmatrixfolderwi $rimatrixfolderwi $ppmimatrixfolderwi $svdmatrixfolderwi)
+
+# Run model code
+outfolder=$sgnsmatrixfolderwi
+source scripts/run_SGNS_WI.sh # Skip-Gram with Negative Sampling for Word Injection
+outfolder=$countmatrixfolderwi
+source scripts/run_CNT_WI.sh # Raw Count
+matrixfolder=$countmatrixfolderwi
+outfolder=$rimatrixfolderwi
+source scripts/run_RI.sh # Random Indexing
+matrixfolder=$countmatrixfolderwi
+outfolder=$ppmimatrixfolderwi
+source scripts/run_PPMI.sh # PPMI
+matrixfolder=$ppmimatrixfolderwi
+outfolder=$svdmatrixfolderwi
+source scripts/run_SVD.sh # SVD
+
+# Get Predictions
+for matrixfolder in "${matrixfolders[@]}"
+do
+    # Measure change scores from common Word Injection matrix
+    matrixfolder1=$matrixfolder
+    matrixfolder2=$matrixfolder
+    outfolder=$resultfolder
+    source scripts/run_CD.sh # Cosine Distance
+    source scripts/run_LND.sh # Local Neighborhood Distance
+done
+
+# Evaluate predictions
+resultfolder=$resultfolder
+outfolder=$globalresultfolder
+source scripts/run_SPR.sh # Get Spearman correlation of measure predictions with gold scores
diff --git a/scripts/parameters_durel.sh b/scripts/parameters_durel.sh
new file mode 100644
index 0000000..e70f606
--- /dev/null
+++ b/scripts/parameters_durel.sh
@@ -0,0 +1,88 @@
+shopt -s extglob # For more powerful regular expressions in shell
+
+### Define parameters ###
+declare -a corpDir="corpora/test/" # directory for corpus files (all files in directory will be read)
+declare -a wiCorpDir="corpora/test_wi/" # directory for word-injected corpus (only needed for Word Injection)
+declare -a bounds1=(1750 1799) # lower and upper bound for first time period
+declare -a bounds2=(1850 1899) # lower and upper bound for second time period
+declare -a freqnorms=(26650530 40323497) # normalization constants for token frequency (total number of tokens in first and second time period)
+declare -a typesnorms=(252437 796365) # normalization constants for number of context types (total number of types in first and second time period)
+declare -a ks=(5 1) # values for shifting parameter k
+declare -a ts=(0.001 None) # values for subsampling parameter t
+declare -a iterations=(1 2 3 4 5) # list of iterations, each item is one iteration, for five iterations define: iterations=(1 2 3 4 5)
+declare -a dim=300 # dimensionality of low-dimensional matrices (SVD/RI/SGNS)
+declare -a testset="testsets/durel/targets.tsv" # target words for which change scores should be predicted (one target per line repeated twice with tab-separation, i.e., 'word\tword')
+declare -a testsetwi="testsets/durel/targets_wi.tsv" # target words for Word Injection (one target per line, injected version in first column, non-injected version in second column, i.e., 'word_\tword')
+declare -a goldscorefile="testsets/durel/gold.tsv" # file with gold scores for target words in same order as targets in testsets
+
+
+### No changes needed after this line ###
+
+# Get time bounds for corpora	    
+lowerBound1=${bounds1[0]}
+upperBound1=${bounds1[1]}
+lowerBound2=${bounds2[0]}
+upperBound2=${bounds2[1]}
+
+# Get normalization constants for dispersion measures
+declare -a freqnorm1=${freqnorms[0]}
+declare -a freqnorm2=${freqnorms[1]}
+declare -a typesnorm1=${typesnorms[0]}
+declare -a typesnorm2=${typesnorms[1]}
+
+
+## Make result folder structure
+declare -a globalresultfolder=$globalresultfolderprefix/$(basename "$corpDir")
+mkdir --parents $globalresultfolder
+declare -a globalresultfolder=$globalresultfolderprefix/$(basename "$corpDir")
+mkdir --parents $globalresultfolder
+declare -a resultfolder=$globalresultfolder/$(basename "${testset%.*}")
+mkdir --parents $resultfolder
+# For dispersion measures
+declare -a resultfolder1=$resultfolder/$lowerBound1-$upperBound1
+mkdir --parents $resultfolder1
+declare -a resultfolder2=$resultfolder/$lowerBound2-$upperBound2
+mkdir --parents $resultfolder2
+declare -a resultfolders=($resultfolder1:1 $resultfolder2:2)
+declare -a measures=(entropy types freq)
+for folder2suffix in "${resultfolders[@]}"
+do
+    folder="$(cut -d':' -f1 <<<"$folder2suffix")"
+    suffix="$(cut -d':' -f2 <<<"$folder2suffix")"
+    for measure in "${measures[@]}"
+    do
+	declare -a $measure\resultfolder$suffix=$folder/$measure
+	mkdir --parents $( eval "echo $"$measure\resultfolder$suffix"" )
+    done
+done
+
+
+# Make matrix folder structure
+declare -a globalmatrixfolder=$globalmatrixfolderprefix/$(basename "$corpDir")
+declare -a globalmatrixfolder1=$globalmatrixfolder/$lowerBound1-$upperBound1
+declare -a globalmatrixfolder2=$globalmatrixfolder/$lowerBound2-$upperBound2
+declare -a globalmatrixfolderwi=$globalmatrixfolder/wi
+mkdir --parents $globalmatrixfolder
+mkdir --parents $globalmatrixfolder1
+mkdir --parents $globalmatrixfolder2
+mkdir --parents $globalmatrixfolderwi
+
+declare -a matrixfolders=($globalmatrixfolder1:1 $globalmatrixfolder2:2 $globalmatrixfolderwi:wi)
+for matrixfolder2suffix in "${matrixfolders[@]}"
+do
+    matrixfolder="$(cut -d':' -f1 <<<"$matrixfolder2suffix")"
+    suffix="$(cut -d':' -f2 <<<"$matrixfolder2suffix")"
+    
+    declare -a countmatrixfolder$suffix=$matrixfolder/count
+    declare -a ppmimatrixfolder$suffix=$matrixfolder/ppmi
+    declare -a svdmatrixfolder$suffix=$matrixfolder/svd
+    declare -a rimatrixfolder$suffix=$matrixfolder/ri
+    declare -a sgnsmatrixfolder$suffix=$matrixfolder/sgns
+    declare -a alignedmatrixfolder$suffix=$matrixfolder/aligned
+    mkdir --parents $( eval "echo $"countmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"ppmimatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"svdmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"rimatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"sgnsmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"alignedmatrixfolder$suffix"" )    
+done
diff --git a/scripts/parameters_surel.sh b/scripts/parameters_surel.sh
new file mode 100644
index 0000000..ea41b66
--- /dev/null
+++ b/scripts/parameters_surel.sh
@@ -0,0 +1,88 @@
+shopt -s extglob # For more powerful regular expressions in shell
+
+### Define parameters ###
+declare -a corpDir="corpora/test/" # directory for corpus files (all files in directory will be read)
+declare -a wiCorpDir="corpora/test_wi/" # directory for word-injected corpus (only needed for Word Injection)
+declare -a bounds1=(2006 2006) # lower and upper bound for first time period
+declare -a bounds2=(2020 2020) # lower and upper bound for second time period
+declare -a freqnorms=(109731661 1049573) # normalization constants for token frequency (total number of tokens in first and second time period)
+declare -a typesnorms=(2417171 49187) # normalization constants for number of context types (total number of types in first and second time period)
+declare -a ks=(5 1) # values for shifting parameter k
+declare -a ts=(0.001 None) # values for subsampling parameter t
+declare -a iterations=(1 2 3 4 5) # list of iterations, each item is one iteration, for five iterations define: iterations=(1 2 3 4 5)
+declare -a dim=300 # dimensionality of low-dimensional matrices (SVD/RI/SGNS)
+declare -a testset="testsets/surel/targets.tsv" # target words for which change scores should be predicted (one target per line repeated twice with tab-separation, i.e., 'word\tword')
+declare -a testsetwi="testsets/surel/targets_wi.tsv" # target words for Word Injection (one target per line, injected version in first column, non-injected version in second column, i.e., 'word_\tword')
+declare -a goldscorefile="testsets/surel/gold.tsv" # file with gold scores for target words in same order as targets in testsets
+
+
+### No changes needed after this line ###
+
+# Get time bounds for corpora	    
+lowerBound1=${bounds1[0]}
+upperBound1=${bounds1[1]}
+lowerBound2=${bounds2[0]}
+upperBound2=${bounds2[1]}
+
+# Get normalization constants for dispersion measures
+declare -a freqnorm1=${freqnorms[0]}
+declare -a freqnorm2=${freqnorms[1]}
+declare -a typesnorm1=${typesnorms[0]}
+declare -a typesnorm2=${typesnorms[1]}
+
+
+## Make result folder structure
+declare -a globalresultfolder=$globalresultfolderprefix/$(basename "$corpDir")
+mkdir --parents $globalresultfolder
+declare -a globalresultfolder=$globalresultfolderprefix/$(basename "$corpDir")
+mkdir --parents $globalresultfolder
+declare -a resultfolder=$globalresultfolder/$(basename "${testset%.*}")
+mkdir --parents $resultfolder
+# For dispersion measures
+declare -a resultfolder1=$resultfolder/$lowerBound1-$upperBound1
+mkdir --parents $resultfolder1
+declare -a resultfolder2=$resultfolder/$lowerBound2-$upperBound2
+mkdir --parents $resultfolder2
+declare -a resultfolders=($resultfolder1:1 $resultfolder2:2)
+declare -a measures=(entropy types freq)
+for folder2suffix in "${resultfolders[@]}"
+do
+    folder="$(cut -d':' -f1 <<<"$folder2suffix")"
+    suffix="$(cut -d':' -f2 <<<"$folder2suffix")"
+    for measure in "${measures[@]}"
+    do
+	declare -a $measure\resultfolder$suffix=$folder/$measure
+	mkdir --parents $( eval "echo $"$measure\resultfolder$suffix"" )
+    done
+done
+
+
+# Make matrix folder structure
+declare -a globalmatrixfolder=$globalmatrixfolderprefix/$(basename "$corpDir")
+declare -a globalmatrixfolder1=$globalmatrixfolder/$lowerBound1-$upperBound1
+declare -a globalmatrixfolder2=$globalmatrixfolder/$lowerBound2-$upperBound2
+declare -a globalmatrixfolderwi=$globalmatrixfolder/wi
+mkdir --parents $globalmatrixfolder
+mkdir --parents $globalmatrixfolder1
+mkdir --parents $globalmatrixfolder2
+mkdir --parents $globalmatrixfolderwi
+
+declare -a matrixfolders=($globalmatrixfolder1:1 $globalmatrixfolder2:2 $globalmatrixfolderwi:wi)
+for matrixfolder2suffix in "${matrixfolders[@]}"
+do
+    matrixfolder="$(cut -d':' -f1 <<<"$matrixfolder2suffix")"
+    suffix="$(cut -d':' -f2 <<<"$matrixfolder2suffix")"
+    
+    declare -a countmatrixfolder$suffix=$matrixfolder/count
+    declare -a ppmimatrixfolder$suffix=$matrixfolder/ppmi
+    declare -a svdmatrixfolder$suffix=$matrixfolder/svd
+    declare -a rimatrixfolder$suffix=$matrixfolder/ri
+    declare -a sgnsmatrixfolder$suffix=$matrixfolder/sgns
+    declare -a alignedmatrixfolder$suffix=$matrixfolder/aligned
+    mkdir --parents $( eval "echo $"countmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"ppmimatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"svdmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"rimatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"sgnsmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"alignedmatrixfolder$suffix"" )    
+done
diff --git a/scripts/parameters_test.sh b/scripts/parameters_test.sh
new file mode 100644
index 0000000..972379c
--- /dev/null
+++ b/scripts/parameters_test.sh
@@ -0,0 +1,88 @@
+shopt -s extglob # For more powerful regular expressions in shell
+
+### Define parameters ###
+declare -a corpDir="corpora/test/" # directory for corpus files (all files in directory will be read)
+declare -a wiCorpDir="corpora/test_wi/" # directory for word-injected corpus (only needed for Word Injection)
+declare -a bounds1=(1750 1799) # lower and upper bound for first time period
+declare -a bounds2=(1850 1899) # lower and upper bound for second time period
+declare -a freqnorms=(73314 110409) # normalization constants for token frequency (total number of tokens in first and second time period)
+declare -a typesnorms=(9658 14177) # normalization constants for number of context types (total number of types in first and second time period)
+declare -a ks=(1) # values for shifting parameter k
+declare -a ts=(None) # values for subsampling parameter t
+declare -a iterations=(1) # list of iterations, each item is one iteration, for five iterations define: iterations=(1 2 3 4 5)
+declare -a dim=30 # dimensionality of low-dimensional matrices (SVD/RI/SGNS)
+declare -a testset="testsets/test/targets.tsv" # target words for which change scores should be predicted (one target per line repeated twice with tab-separation, i.e., 'word\tword')
+declare -a testsetwi="testsets/test/targets_wi.tsv" # target words for Word Injection (one target per line, injected version in first column, non-injected version in second column, i.e., 'word_\tword')
+declare -a goldscorefile="testsets/test/gold.tsv" # file with gold scores for target words in same order as targets in testsets
+
+
+### No changes needed after this line ###
+
+# Get time bounds for corpora	    
+lowerBound1=${bounds1[0]}
+upperBound1=${bounds1[1]}
+lowerBound2=${bounds2[0]}
+upperBound2=${bounds2[1]}
+
+# Get normalization constants for dispersion measures
+declare -a freqnorm1=${freqnorms[0]}
+declare -a freqnorm2=${freqnorms[1]}
+declare -a typesnorm1=${typesnorms[0]}
+declare -a typesnorm2=${typesnorms[1]}
+
+
+## Make result folder structure
+declare -a globalresultfolder=$globalresultfolderprefix/$(basename "$corpDir")
+mkdir --parents $globalresultfolder
+declare -a globalresultfolder=$globalresultfolderprefix/$(basename "$corpDir")
+mkdir --parents $globalresultfolder
+declare -a resultfolder=$globalresultfolder/$(basename "${testset%.*}")
+mkdir --parents $resultfolder
+# For dispersion measures
+declare -a resultfolder1=$resultfolder/$lowerBound1-$upperBound1
+mkdir --parents $resultfolder1
+declare -a resultfolder2=$resultfolder/$lowerBound2-$upperBound2
+mkdir --parents $resultfolder2
+declare -a resultfolders=($resultfolder1:1 $resultfolder2:2)
+declare -a measures=(entropy types freq)
+for folder2suffix in "${resultfolders[@]}"
+do
+    folder="$(cut -d':' -f1 <<<"$folder2suffix")"
+    suffix="$(cut -d':' -f2 <<<"$folder2suffix")"
+    for measure in "${measures[@]}"
+    do
+	declare -a $measure\resultfolder$suffix=$folder/$measure
+	mkdir --parents $( eval "echo $"$measure\resultfolder$suffix"" )
+    done
+done
+
+
+# Make matrix folder structure
+declare -a globalmatrixfolder=$globalmatrixfolderprefix/$(basename "$corpDir")
+declare -a globalmatrixfolder1=$globalmatrixfolder/$lowerBound1-$upperBound1
+declare -a globalmatrixfolder2=$globalmatrixfolder/$lowerBound2-$upperBound2
+declare -a globalmatrixfolderwi=$globalmatrixfolder/wi
+mkdir --parents $globalmatrixfolder
+mkdir --parents $globalmatrixfolder1
+mkdir --parents $globalmatrixfolder2
+mkdir --parents $globalmatrixfolderwi
+
+declare -a matrixfolders=($globalmatrixfolder1:1 $globalmatrixfolder2:2 $globalmatrixfolderwi:wi)
+for matrixfolder2suffix in "${matrixfolders[@]}"
+do
+    matrixfolder="$(cut -d':' -f1 <<<"$matrixfolder2suffix")"
+    suffix="$(cut -d':' -f2 <<<"$matrixfolder2suffix")"
+    
+    declare -a countmatrixfolder$suffix=$matrixfolder/count
+    declare -a ppmimatrixfolder$suffix=$matrixfolder/ppmi
+    declare -a svdmatrixfolder$suffix=$matrixfolder/svd
+    declare -a rimatrixfolder$suffix=$matrixfolder/ri
+    declare -a sgnsmatrixfolder$suffix=$matrixfolder/sgns
+    declare -a alignedmatrixfolder$suffix=$matrixfolder/aligned
+    mkdir --parents $( eval "echo $"countmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"ppmimatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"svdmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"rimatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"sgnsmatrixfolder$suffix"" )
+    mkdir --parents $( eval "echo $"alignedmatrixfolder$suffix"" )    
+done
diff --git a/scripts/run_CD.sh b/scripts/run_CD.sh
new file mode 100644
index 0000000..9aada86
--- /dev/null
+++ b/scripts/run_CD.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder1/!(*@(|row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    python -u measures/cd.py -s "${matrix%.*}" $matrixfolder2/$(basename "${matrix%.*}") $outfolder/CD-$(basename "$testset")-$(basename "$matrix") $testset # cosine distance
+done
+
diff --git a/scripts/run_CI.sh b/scripts/run_CI.sh
new file mode 100644
index 0000000..12fd9da
--- /dev/null
+++ b/scripts/run_CI.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder1/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}" 
+do
+    python -u alignment/ci_align.py $outfolder1/$(basename "${matrix%.*}")-CI $outfolder2/$(basename "${matrix%.*}")-CI "${matrix%.*}" $matrixfolder2/$(basename "${matrix%.*}") # align matrices by column intersection
+done
+
diff --git a/scripts/run_CNT.sh b/scripts/run_CNT.sh
new file mode 100644
index 0000000..d06f135
--- /dev/null
+++ b/scripts/run_CNT.sh
@@ -0,0 +1,5 @@
+
+for windowSize in "${windowSizes[@]}"
+do	
+    python -u representations/count.py $windowSize $corpDir $outfolder/$(basename "$corpDir")-win$windowSize.count.sm $lowerBound $upperBound # Create count matrix
+done
diff --git a/scripts/run_CNT_WI.sh b/scripts/run_CNT_WI.sh
new file mode 100644
index 0000000..a43feec
--- /dev/null
+++ b/scripts/run_CNT_WI.sh
@@ -0,0 +1,5 @@
+
+for windowSize in "${windowSizes[@]}"
+do	
+    python -u representations/count.py $windowSize $wiCorpDir $outfolder/$(basename "$wiCorpDir")-win$windowSize.count.sm 0000 9999 # construct count matrix for word-injected corpus
+done
diff --git a/scripts/run_ENTR.sh b/scripts/run_ENTR.sh
new file mode 100644
index 0000000..6dc37eb
--- /dev/null
+++ b/scripts/run_ENTR.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder/!(*@(|row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    python -u measures/entropy.py "${matrix%.*}" $outfolder/entropies-$(basename "$matrix") $testset # entropy    
+done
+
diff --git a/scripts/run_FREQ.sh b/scripts/run_FREQ.sh
new file mode 100644
index 0000000..2741ca2
--- /dev/null
+++ b/scripts/run_FREQ.sh
@@ -0,0 +1,3 @@
+
+python -u measures/freq.py $corpDir $outfolder/freq-$(basename "$corpDir") $lowerBound $upperBound $testset # token frequency
+
diff --git a/scripts/run_LND.sh b/scripts/run_LND.sh
new file mode 100644
index 0000000..9803905
--- /dev/null
+++ b/scripts/run_LND.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder1/!(*@(|row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    python -u measures/lnd.py -s "${matrix%.*}" $matrixfolder2/$(basename "${matrix%.*}") 25 $outfolder/LND-$(basename "$testset")-$(basename "$matrix") $testset # local neighborhood distance
+done
+
diff --git a/scripts/run_NENTR.sh b/scripts/run_NENTR.sh
new file mode 100644
index 0000000..d769a11
--- /dev/null
+++ b/scripts/run_NENTR.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder/!(*@(|row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    python -u measures/entropy.py -n "${matrix%.*}" $outfolder/normalized-entropies-$(basename "$matrix") $testset # entropy normalized 
+done
+
diff --git a/scripts/run_NFREQ.sh b/scripts/run_NFREQ.sh
new file mode 100644
index 0000000..0daaaf2
--- /dev/null
+++ b/scripts/run_NFREQ.sh
@@ -0,0 +1,3 @@
+
+python -u measures/freq.py -n $norm $corpDir $outfolder/normalized-freq-$(basename "$corpDir") $lowerBound $upperBound $testset # token frequency normalized
+
diff --git a/scripts/run_NTYPE.sh b/scripts/run_NTYPE.sh
new file mode 100644
index 0000000..1d0b0a4
--- /dev/null
+++ b/scripts/run_NTYPE.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder/!(*@(|row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    python -u measures/types.py -n $norm "${matrix%.*}" $outfolder/normalized-types-$(basename "$matrix") $testset # number of context types normalized
+done
+
diff --git a/scripts/run_OP+.sh b/scripts/run_OP+.sh
new file mode 100644
index 0000000..21f98b7
--- /dev/null
+++ b/scripts/run_OP+.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder1/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}" 
+do
+    python3 -u alignment/map_embeddings.py --normalize unit center unit --init_identical --whiten --src_reweight=0.5 --trg_reweight=0.5 --src_dewhiten='src' --trg_dewhiten='trg' $matrixfolder2/$(basename "$matrix") $matrix $outfolder2/$(basename "${matrix%.*}")-OP+.w2v $outfolder1/$(basename "${matrix%.*}")-OP+.w2v # align matrices by Orthogonal Procrustes plus additional pre- and post-processing steps
+done
+
diff --git a/scripts/run_OP-.sh b/scripts/run_OP-.sh
new file mode 100644
index 0000000..d958edd
--- /dev/null
+++ b/scripts/run_OP-.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder1/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}" 
+do
+    python3 -u alignment/map_embeddings.py --normalize unit --init_identical --orthogonal $matrixfolder2/$(basename "$matrix") $matrix $outfolder2/$(basename "${matrix%.*}")-OP-.w2v $outfolder1/$(basename "${matrix%.*}")-OP-.w2v # align matrices by Orthogonal Procrustes without centering
+done
+
diff --git a/scripts/run_OP.sh b/scripts/run_OP.sh
new file mode 100644
index 0000000..8431d64
--- /dev/null
+++ b/scripts/run_OP.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder1/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}" 
+do
+    python3 -u alignment/map_embeddings.py --normalize unit center --init_identical --orthogonal $matrixfolder2/$(basename "$matrix") $matrix $outfolder2/$(basename "${matrix%.*}")-OP.w2v $outfolder1/$(basename "${matrix%.*}")-OP.w2v # align matrices by Orthogonal Procrustes
+done
+
diff --git a/scripts/run_PPMI.sh b/scripts/run_PPMI.sh
new file mode 100644
index 0000000..9a421f7
--- /dev/null
+++ b/scripts/run_PPMI.sh
@@ -0,0 +1,10 @@
+
+matrices=($matrixfolder/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    for k in "${ks[@]}"
+    do
+	python -u representations/ppmi.py "${matrix%.*}" $k 0.75 $outfolder/$(basename "${matrix%.*}")-k$k # weight matrix with PPMI
+    done    
+done
diff --git a/scripts/run_RI.sh b/scripts/run_RI.sh
new file mode 100644
index 0000000..25d07c1
--- /dev/null
+++ b/scripts/run_RI.sh
@@ -0,0 +1,15 @@
+
+matrices=($matrixfolder/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    for iteration in "${iterations[@]}"
+    do
+	for t in "${ts[@]}"
+	do
+	    python -u representations/ri.py -s 2 $dim $t $outfolder/$(basename "${matrix%.*}")-t$t-iter$iteration $outfolder/$(basename "${matrix%.*}")-t$t-iter$iteration-elemental-space "${matrix%.*}" # reduce matrix by random indexing	    
+	done    
+    done    
+done
+
+rm $outfolder/*elemental-space* # delete random vectors after constructing the matrix
diff --git a/scripts/run_SBTR.sh b/scripts/run_SBTR.sh
new file mode 100644
index 0000000..6e49038
--- /dev/null
+++ b/scripts/run_SBTR.sh
@@ -0,0 +1,8 @@
+
+resultfiles=($infolder1/*)
+
+for resultfile in "${resultfiles[@]}"
+do
+    python -u measures/subtract.py -a $testset $resultfile $infolder2/$(basename "$resultfile") $outfolder/subtract-$(basename "${resultfile%.*}") # subtract values
+done
+
diff --git a/scripts/run_SGNS.sh b/scripts/run_SGNS.sh
new file mode 100644
index 0000000..fa61092
--- /dev/null
+++ b/scripts/run_SGNS.sh
@@ -0,0 +1,14 @@
+
+for windowSize in "${windowSizes[@]}"
+do
+    for k in "${ks[@]}"
+    do
+	for t in "${ts[@]}"
+	do		    		
+	    for iteration in "${iterations[@]}"
+	    do
+		python -u representations/sgns.py $windowSize $dim $k $t 0 5 $corpDir $outfolder/$(basename "$corpDir")-win$windowSize-k$k-t$t-iter$iteration.sgns $lowerBound $upperBound # construct word2vec skip-gram embeddings
+	    done	    
+	done	    		
+    done	
+done
diff --git a/scripts/run_SGNS_VI.sh b/scripts/run_SGNS_VI.sh
new file mode 100644
index 0000000..c890f6b
--- /dev/null
+++ b/scripts/run_SGNS_VI.sh
@@ -0,0 +1,15 @@
+
+for windowSize in "${windowSizes[@]}"
+do
+    for k in "${ks[@]}"
+    do
+	for t in "${ts[@]}"
+	do		    		
+	    for iteration in "${iterations[@]}"
+	    do
+		python -u alignment/sgns_vi.py $infolder/$(basename "$corpDir")-win$windowSize-k$k-t$t-iter$iteration.sgns.w2v $windowSize $dim $k $t 0 5 $corpDir $outfolder/$(basename "$corpDir")-win$windowSize-k$k-t$t-iter$iteration\_vi.sgns $lowerBound2 $upperBound2 # construct word2vec skip-gram embeddings with vector initialization
+		scp $infolder/$(basename "$corpDir")-win$windowSize-k$k-t$t-iter$iteration.sgns.w2v $infolder/$(basename "$corpDir")-win$windowSize-k$k-t$t-iter$iteration\_vi.sgns.w2v # copy initialization vectors as matrix for first time period
+	    done	    
+	done	    		
+    done	
+done
diff --git a/scripts/run_SGNS_WI.sh b/scripts/run_SGNS_WI.sh
new file mode 100644
index 0000000..9963989
--- /dev/null
+++ b/scripts/run_SGNS_WI.sh
@@ -0,0 +1,14 @@
+
+for windowSize in "${windowSizes[@]}"
+do
+    for k in "${ks[@]}"
+    do
+	for t in "${ts[@]}"
+	do		    		
+	    for iteration in "${iterations[@]}"
+	    do
+		python -u representations/sgns.py $windowSize $dim $k $t 0 5 $wiCorpDir $outfolder/$(basename "$wiCorpDir")-win$windowSize-k$k-t$t-iter$iteration 0000 9999 # construct word2vec skip-gram embeddings for word-injected corpus
+	    done	    
+	done	    		
+    done	
+done
diff --git a/scripts/run_SPR.sh b/scripts/run_SPR.sh
new file mode 100644
index 0000000..d741b27
--- /dev/null
+++ b/scripts/run_SPR.sh
@@ -0,0 +1,6 @@
+
+for resultfile in $resultfolder/*.csv
+do
+    declare -a resultfileshort=${resultfile#$(dirname "$(dirname "$resultfile")")/}
+    python -u evaluation/spearman.py $goldscorefile $resultfile $(basename "$goldscorefile") $resultfileshort 0 1 >> $outfolder/spearman_$(basename "$resultfolder").csv # evaluate results with Spearman correlation
+done
diff --git a/scripts/run_SRV.sh b/scripts/run_SRV.sh
new file mode 100644
index 0000000..bc49475
--- /dev/null
+++ b/scripts/run_SRV.sh
@@ -0,0 +1,15 @@
+
+matrices=($matrixfolder1/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    for iteration in "${iterations[@]}"
+    do
+	for t in "${ts[@]}"
+	do
+	    python -u alignment/srv_align.py -s 2 $dim $t $outfolder1/$(basename "${matrix%.*}")-t$t-iter$iteration-SRV $outfolder2/$(basename "${matrix%.*}")-t$t-iter$iteration-SRV $outfolder1/$(basename "${matrix%.*}")-t$t-iter$iteration-elemental-space "${matrix%.*}" $matrixfolder2/$(basename "${matrix%.*}") # construct random indexing matrices from count matrices with shared random vectors
+	done
+    done
+done
+
+rm $outfolder1/*elemental-space* # delete the shared random vectors after constructing the matrices
diff --git a/scripts/run_SVD.sh b/scripts/run_SVD.sh
new file mode 100644
index 0000000..2e7a2d6
--- /dev/null
+++ b/scripts/run_SVD.sh
@@ -0,0 +1,10 @@
+
+matrices=($matrixfolder/!(*@(row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    for iteration in "${iterations[@]}"
+    do		
+	python -u representations/svd.py "${matrix%.*}" $dim 0.0 $outfolder/$(basename "${matrix%.*}")-iter$iteration # reduce matrix by SVD
+    done    
+done
diff --git a/scripts/run_TRSF.sh b/scripts/run_TRSF.sh
new file mode 100644
index 0000000..9085d7c
--- /dev/null
+++ b/scripts/run_TRSF.sh
@@ -0,0 +1,8 @@
+
+resultfiles=($infolder/*)
+
+for resultfile in "${resultfiles[@]}"
+do
+    python -u measures/transform.py --log2 $testset $resultfile $outfolder/transformed-$(basename "${resultfile%.*}") # log-transform values
+done
+
diff --git a/scripts/run_TYPE.sh b/scripts/run_TYPE.sh
new file mode 100644
index 0000000..7528827
--- /dev/null
+++ b/scripts/run_TYPE.sh
@@ -0,0 +1,8 @@
+
+matrices=($matrixfolder/!(*@(|row2id*|id2row*|id2column*|column2id*)))
+
+for matrix in "${matrices[@]}"
+do
+    python -u measures/types.py "${matrix%.*}" $outfolder/types-$(basename "$matrix") $testset # number of context types   
+done
+
diff --git a/testsets/durel/durel.tsv b/testsets/durel/durel.tsv
new file mode 100644
index 0000000..c8a3433
--- /dev/null
+++ b/testsets/durel/durel.tsv
@@ -0,0 +1,20 @@
+Lexeme	POS	LSC	frequency Dta18	frequency Dta19
+Vorwort	NN	-1.5825	85	273
+Donnerwetter	NN	-1.8375	100	89
+Presse	NN	-1.8825	193	1519
+Feine	NN	-1.93	112	84
+Anstalt	NN	-2.0725	425	911
+Feder	NN	-2.1403508772	1489	3022
+billig	ADJ	-2.4316666667	2073	1705
+Motiv	NN	-2.66	104	2551
+Anstellung	NN	-2.6789473684	53	499
+packen	VV	-2.7350877193	279	1057
+locker	ADJ	-2.84	454	769
+technisch	ADJ	-2.89	25	2177
+geharnischt	ADJ	-3	56	117
+Zufall	NN	-3.1125	2444	1618
+Bilanz	NN	-3.2	51	58
+englisch	ADJ	-3.3375	1921	7280
+Reichstag	NN	-3.4525	609	1781
+Museum	NN	-3.7325	414	1827
+Abend	NN	-3.79	4144	4372
diff --git a/testsets/durel/gold.tsv b/testsets/durel/gold.tsv
new file mode 100644
index 0000000..2f6bcae
--- /dev/null
+++ b/testsets/durel/gold.tsv
@@ -0,0 +1,19 @@
+-3.79
+-2.0725
+-2.6789473684
+-3.2
+-2.4316666667
+-1.8375
+-3.3375
+-2.1403508772
+-1.93
+-3
+-2.84
+-2.66
+-3.7325
+-2.7350877193
+-1.8825
+-3.4525
+-2.89
+-1.5825
+-3.1125
diff --git a/testsets/durel/targets.tsv b/testsets/durel/targets.tsv
new file mode 100644
index 0000000..19803af
--- /dev/null
+++ b/testsets/durel/targets.tsv
@@ -0,0 +1,19 @@
+Abend	Abend
+Anstalt	Anstalt
+Anstellung	Anstellung
+Bilanz	Bilanz
+billig	billig
+Donnerwetter	Donnerwetter
+englisch	englisch
+Feder	Feder
+Feine	Feine
+geharnischt	geharnischt
+locker	locker
+Motiv	Motiv
+Museum	Museum
+packen	packen
+Presse	Presse
+Reichstag	Reichstag
+technisch	technisch
+Vorwort	Vorwort
+Zufall	Zufall
diff --git a/testsets/durel/targets_wi.tsv b/testsets/durel/targets_wi.tsv
new file mode 100644
index 0000000..89f8426
--- /dev/null
+++ b/testsets/durel/targets_wi.tsv
@@ -0,0 +1,19 @@
+Abend_	Abend
+Anstalt_	Anstalt
+Anstellung_	Anstellung
+Bilanz_	Bilanz
+billig_	billig
+Donnerwetter_	Donnerwetter
+englisch_	englisch
+Feder_	Feder
+Feine_	Feine
+geharnischt_	geharnischt
+locker_	locker
+Motiv_	Motiv
+Museum_	Museum
+packen_	packen
+Presse_	Presse
+Reichstag_	Reichstag
+technisch_	technisch
+Vorwort_	Vorwort
+Zufall_	Zufall
diff --git a/testsets/surel/gold.tsv b/testsets/surel/gold.tsv
new file mode 100644
index 0000000..ec24d45
--- /dev/null
+++ b/testsets/surel/gold.tsv
@@ -0,0 +1,21 @@
+-1.75
+-2.95
+-3.75
+-2.25
+-4
+-1.15
+-2.7
+-1.5294117647
+-3.4473684211
+-3.5
+-3.3333333333
+-3.1
+-3.55
+-3.7368421053
+-1.1
+-1.05
+-4
+-3.975
+-1.4166666667
+-1.05
+-2.65
diff --git a/testsets/surel/surel.tsv b/testsets/surel/surel.tsv
new file mode 100644
index 0000000..81bf600
--- /dev/null
+++ b/testsets/surel/surel.tsv
@@ -0,0 +1,22 @@
+Lexeme	POS	LSC	frequency SdeWaC	frequency Cook
+Schnee	NN	-1.05	2228	53
+Strudel	NN	-1.05	232	46
+schlagen	VV	-1.1	14693	309
+Gericht	NN	-1.15	13263	1071
+Schuß	NN	-1.4166666667	2153	117
+Hamburger	NN	-1.5294117647	5558	46
+abschrecken	VV	-1.75	730	170
+Form	NN	-2.25	36639	851
+trennen	VV	-2.65	5771	170
+Glas	NN	-2.7	3830	863
+Blech	NN	-2.95	409	145
+Prise	NN	-3.1	370	622
+Paprika	NN	-3.3333333333	377	453
+Mandel	NN	-3.4473684211	402	274
+Messer	NN	-3.5	1774	925
+Rum	NN	-3.55	244	181
+Salz	NN	-3.7368421053	3087	5806
+Eiweiß	NN	-3.75	1075	3037
+Schokolade	NN	-3.975	947	251
+Gemüse	NN	-4	2696	1224
+Schnittlauch	NN	-4	156	247
diff --git a/testsets/surel/targets.tsv b/testsets/surel/targets.tsv
new file mode 100644
index 0000000..beb4f65
--- /dev/null
+++ b/testsets/surel/targets.tsv
@@ -0,0 +1,21 @@
+abschrecken	abschrecken
+Blech	Blech
+Eiweiß	Eiweiß
+Form	Form
+Gemüse	Gemüse
+Gericht	Gericht
+Glas	Glas
+Hamburger	Hamburger
+Mandel	Mandel
+Messer	Messer
+Paprika	Paprika
+Prise	Prise
+Rum	Rum
+Salz	Salz
+schlagen	schlagen
+Schnee	Schnee
+Schnittlauch	Schnittlauch
+Schokolade	Schokolade
+Schuß	Schuß
+Strudel	Strudel
+trennen	trennen
diff --git a/testsets/surel/targets_wi.tsv b/testsets/surel/targets_wi.tsv
new file mode 100644
index 0000000..d58d7d2
--- /dev/null
+++ b/testsets/surel/targets_wi.tsv
@@ -0,0 +1,21 @@
+abschrecken_	abschrecken
+Blech_	Blech
+Eiweiß_	Eiweiß
+Form_	Form
+Gemüse_	Gemüse
+Gericht_	Gericht
+Glas_	Glas
+Hamburger_	Hamburger
+Mandel_	Mandel
+Messer_	Messer
+Paprika_	Paprika
+Prise_	Prise
+Rum_	Rum
+Salz_	Salz
+schlagen_	schlagen
+Schnee_	Schnee
+Schnittlauch_	Schnittlauch
+Schokolade_	Schokolade
+Schuß_	Schuß
+Strudel_	Strudel
+trennen_	trennen
diff --git a/testsets/test/gold.tsv b/testsets/test/gold.tsv
new file mode 100644
index 0000000..57c508f
--- /dev/null
+++ b/testsets/test/gold.tsv
@@ -0,0 +1,4 @@
+1.1
+2.2
+0.5
+3.6
diff --git a/testsets/test/targets.tsv b/testsets/test/targets.tsv
new file mode 100644
index 0000000..0ed987f
--- /dev/null
+++ b/testsets/test/targets.tsv
@@ -0,0 +1,4 @@
+Gott	Gott
+und	und
+haben	haben
+göttlich	göttlich
diff --git a/testsets/test/targets_wi.tsv b/testsets/test/targets_wi.tsv
new file mode 100644
index 0000000..48383c0
--- /dev/null
+++ b/testsets/test/targets_wi.tsv
@@ -0,0 +1,4 @@
+Gott_	Gott
+und_	und
+haben_	haben
+göttlich_	göttlich