Skip to content

Commit

Permalink
gh-222: add emult for vector
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorOrachyov committed Sep 1, 2023
1 parent 6369d1c commit 2e2d856
Show file tree
Hide file tree
Showing 15 changed files with 367 additions and 15 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ add_library(spla SHARED
src/cpu/cpu_v_count_mf.hpp
src/cpu/cpu_v_eadd.hpp
src/cpu/cpu_v_eadd_fdb.hpp
src/cpu/cpu_v_emult.hpp
src/cpu/cpu_v_map.hpp
src/cpu/cpu_v_reduce.hpp
src/util/pair_hash.hpp
Expand Down
1 change: 1 addition & 0 deletions include/spla.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ SPLA_API spla_Status spla_Exec_m_reduce_by_row(spla_Vector r, spla_Matrix M, spl
SPLA_API spla_Status spla_Exec_m_reduce_by_column(spla_Vector r, spla_Matrix M, spla_OpBinary op_reduce, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_m_reduce(spla_Scalar r, spla_Scalar s, spla_Matrix M, spla_OpBinary op_reduce, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_eadd(spla_Vector r, spla_Vector u, spla_Vector v, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_emult(spla_Vector r, spla_Vector u, spla_Vector v, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_eadd_fdb(spla_Vector r, spla_Vector v, spla_Vector fdb, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_assign_masked(spla_Vector r, spla_Vector mask, spla_Scalar value, spla_OpBinary op_assign, spla_OpSelect op_select, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_map(spla_Vector r, spla_Vector v, spla_OpUnary op, spla_Descriptor desc, spla_ScheduleTask* task);
Expand Down
20 changes: 20 additions & 0 deletions include/spla/exec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,26 @@ namespace spla {
ref_ptr<Descriptor> desc = ref_ptr<Descriptor>(),
ref_ptr<ScheduleTask>* task_hnd = nullptr);

/**
* @brief Execute (schedule) element-wise multiplication by structure of two vectors
*
* @param r Vector to store result of operation
* @param u Vector input to mult
* @param v Vector input to mult
* @param op Element-wise binary operator mult elements of vectors
* @param desc Scheduled task descriptor; default is null
* @param task_hnd Optional task hnd; pass not-null pointer to store task
*
* @return Status on task execution or status on hnd creation
*/
SPLA_API Status exec_v_emult(
ref_ptr<Vector> r,
ref_ptr<Vector> u,
ref_ptr<Vector> v,
ref_ptr<OpBinary> op,
ref_ptr<Descriptor> desc = ref_ptr<Descriptor>(),
ref_ptr<ScheduleTask>* task_hnd = nullptr);

/**
* @brief Execute (schedule) element-wise addition by structure of two vectors with feedback
*
Expand Down
11 changes: 3 additions & 8 deletions python/example.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
from pyspla import *

M = Matrix.from_lists([0, 1, 2, 2], [1, 2, 0, 4], [1, 2, 3, 4], (3, 5), INT)
print(M)

N = Matrix.from_lists([0, 1, 2, 3], [2, 0, 1, 3], [2, 3, 4, 5], (5, 4), INT)
print(N)

R = M.mxm(N, INT.MULT, INT.PLUS)
print(R)
u = Vector.from_lists([0, 1], [10, 20], 4, INT)
v = Vector.from_lists([1, 3], [-5, 12], 4, INT)
print(u.emult(INT.PLUS, v))
3 changes: 3 additions & 0 deletions python/pyspla/bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,7 @@ def load_library(lib_path):
_spla.spla_Exec_m_reduce_by_column.restype = _status_t
_spla.spla_Exec_m_reduce.restype = _status_t
_spla.spla_Exec_v_eadd.restype = _status_t
_spla.spla_Exec_v_emult.restype = _status_t
_spla.spla_Exec_v_eadd_fdb.restype = _status_t
_spla.spla_Exec_v_assign_masked.restype = _status_t
_spla.spla_Exec_v_map.restype = _status_t
Expand All @@ -577,6 +578,8 @@ def load_library(lib_path):
[_object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_v_eadd.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_v_emult.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_v_eadd_fdb.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_v_assign_masked.argtypes = \
Expand Down
44 changes: 44 additions & 0 deletions python/pyspla/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,50 @@ def eadd(self, op_add, v, out=None, desc=None):

return out

def emult(self, op_mult, v, out=None, desc=None):
"""
Element-wise mult one vector to another and return result.
>>> u = Vector.from_lists([0, 1], [10, 20], 4, INT)
>>> v = Vector.from_lists([1, 3], [-5, 12], 4, INT)
>>> print(u.emult(INT.PLUS, v))
'
0| .
1|15
2| .
3| .
'
:param op_mult: OpBinary.
Binary operation to mult values.
:param v: Vector.
Other right vector to mult with this.
:param out: optional: Vector. default: None.
Optional vector to store result.
:param desc: optional: Descriptor. default: None.
Optional descriptor object to configure the execution.
:return: Vector with result.
"""

if out is None:
out = Vector(shape=self.n_rows, dtype=self.dtype)

assert v
assert v.n_rows == self.n_rows
assert out.n_rows == self.n_rows
assert v.dtype == self.dtype
assert out.dtype == out.dtype
assert op_mult

check(backend().spla_Exec_v_emult(out.hnd, self.hnd, v.hnd, op_mult.hnd,
self._get_desc(desc), self._get_task(None)))

return out

def assign(self, mask, value, op_assign, op_select, desc=None):
"""
Assign scalar value to a vector by mask.
Expand Down
3 changes: 3 additions & 0 deletions src/binding/c_exec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ spla_Status spla_Exec_m_reduce(spla_Scalar r, spla_Scalar s, spla_Matrix M, spla
spla_Status spla_Exec_v_eadd(spla_Vector r, spla_Vector u, spla_Vector v, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_v_eadd, AS_V(r), AS_V(u), AS_V(v), AS_OB(op));
}
spla_Status spla_Exec_v_emult(spla_Vector r, spla_Vector u, spla_Vector v, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_v_emult, AS_V(r), AS_V(u), AS_V(v), AS_OB(op));
}
spla_Status spla_Exec_v_eadd_fdb(spla_Vector r, spla_Vector v, spla_Vector fdb, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_v_eadd_fdb, AS_V(r), AS_V(v), AS_V(fdb), AS_OB(op));
}
Expand Down
6 changes: 6 additions & 0 deletions src/cpu/cpu_algo_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include <cpu/cpu_v_count_mf.hpp>
#include <cpu/cpu_v_eadd.hpp>
#include <cpu/cpu_v_eadd_fdb.hpp>
#include <cpu/cpu_v_emult.hpp>
#include <cpu/cpu_v_map.hpp>
#include <cpu/cpu_v_reduce.hpp>
#include <cpu/cpu_vxm.hpp>
Expand Down Expand Up @@ -71,6 +72,11 @@ namespace spla {
g_registry->add(MAKE_KEY_CPU_0("v_eadd", UINT), std::make_shared<Algo_v_eadd_cpu<T_UINT>>());
g_registry->add(MAKE_KEY_CPU_0("v_eadd", FLOAT), std::make_shared<Algo_v_eadd_cpu<T_FLOAT>>());

// algorthm v_emult
g_registry->add(MAKE_KEY_CPU_0("v_emult", INT), std::make_shared<Algo_v_emult_cpu<T_INT>>());
g_registry->add(MAKE_KEY_CPU_0("v_emult", UINT), std::make_shared<Algo_v_emult_cpu<T_UINT>>());
g_registry->add(MAKE_KEY_CPU_0("v_emult", FLOAT), std::make_shared<Algo_v_emult_cpu<T_FLOAT>>());

// algorthm v_eadd_fdb
g_registry->add(MAKE_KEY_CPU_0("v_eadd_fdb", INT), std::make_shared<Algo_v_eadd_fdb_cpu<T_INT>>());
g_registry->add(MAKE_KEY_CPU_0("v_eadd_fdb", UINT), std::make_shared<Algo_v_eadd_fdb_cpu<T_UINT>>());
Expand Down
14 changes: 13 additions & 1 deletion src/cpu/cpu_format_dok_vec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

#include <cpu/cpu_formats.hpp>

#include <algorithm>

namespace spla {

/**
Expand All @@ -43,9 +45,19 @@ namespace spla {
assert(out.Ai.size() == in.values);
assert(out.Ax.size() == in.values);

uint k = 0;

std::vector<std::pair<uint, T>> tmp;
tmp.reserve(in.values);

for (const auto& entry : in.Ax) {
tmp.emplace_back(entry.first, entry.second);
}

std::sort(tmp.begin(), tmp.end(), [](const auto& a, const auto& b) { return a.first < b.first; });

uint k = 0;

for (const auto& entry : tmp) {
const uint i = entry.first;
const T x = entry.second;
out.Ai[k] = i;
Expand Down
10 changes: 5 additions & 5 deletions src/cpu/cpu_v_eadd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,19 @@ namespace spla {

Status execute(const DispatchContext& ctx) override {
auto t = ctx.task.template cast_safe<ScheduleTask_v_eadd>();
ref_ptr<TVector<T>> u = t->r.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();

if (u->is_valid(FormatVector::CpuDense) && v->is_valid(FormatVector::CpuDense)) {
return execute_dn2dn(ctx);
return execute_dnNdn(ctx);
}

return execute_dn2dn(ctx);
return execute_dnNdn(ctx);
}

private:
Status execute_dn2dn(const DispatchContext& ctx) {
TIME_PROFILE_SCOPE("cpu/vector_eadd_dn2dn");
Status execute_dnNdn(const DispatchContext& ctx) {
TIME_PROFILE_SCOPE("cpu/vector_eadd_dnNdn");

auto t = ctx.task.template cast_safe<ScheduleTask_v_eadd>();
ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
Expand Down
185 changes: 185 additions & 0 deletions src/cpu/cpu_v_emult.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/**********************************************************************************/
/* This file is part of spla project */
/* https://github.com/JetBrains-Research/spla */
/**********************************************************************************/
/* MIT License */
/* */
/* Copyright (c) 2023 SparseLinearAlgebra */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining a copy */
/* of this software and associated documentation files (the "Software"), to deal */
/* in the Software without restriction, including without limitation the rights */
/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
/* copies of the Software, and to permit persons to whom the Software is */
/* furnished to do so, subject to the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be included in all */
/* copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
/* SOFTWARE. */
/**********************************************************************************/

#ifndef SPLA_CPU_V_EMULT_HPP
#define SPLA_CPU_V_EMULT_HPP

#include <schedule/schedule_tasks.hpp>

#include <core/dispatcher.hpp>
#include <core/registry.hpp>
#include <core/top.hpp>
#include <core/tscalar.hpp>
#include <core/ttype.hpp>
#include <core/tvector.hpp>

namespace spla {

template<typename T>
class Algo_v_emult_cpu final : public RegistryAlgo {
public:
~Algo_v_emult_cpu() override = default;

std::string get_name() override {
return "v_emult";
}

std::string get_description() override {
return "sequential element-wise mult vector operation";
}

Status execute(const DispatchContext& ctx) override {
auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();

if (u->is_valid(FormatVector::CpuCoo) && v->is_valid(FormatVector::CpuCoo)) {
return execute_spNsp(ctx);
}
if (u->is_valid(FormatVector::CpuCoo) && v->is_valid(FormatVector::CpuDense)) {
return execute_spNdn(ctx);
}
if (u->is_valid(FormatVector::CpuDense) && v->is_valid(FormatVector::CpuCoo)) {
return execute_dnNsp(ctx);
}

return execute_spNsp(ctx);
}

private:
Status execute_spNsp(const DispatchContext& ctx) {
TIME_PROFILE_SCOPE("cpu/vector_emult_spNsp");

auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();

r->validate_wd(FormatVector::CpuCoo);
u->validate_rw(FormatVector::CpuCoo);
v->validate_rw(FormatVector::CpuCoo);

CpuCooVec<T>* p_r = r->template get<CpuCooVec<T>>();
const CpuCooVec<T>* p_u = u->template get<CpuCooVec<T>>();
const CpuCooVec<T>* p_v = v->template get<CpuCooVec<T>>();
const auto& function = op->function;

assert(p_r->Ai.empty());
assert(p_r->Ax.empty());

const auto u_count = p_u->values;
const auto v_count = p_v->values;
uint u_iter = 0;
uint v_iter = 0;

while (u_iter < u_count && v_iter < v_count) {
if (p_u->Ai[u_iter] < p_v->Ai[v_iter]) {
u_iter += 1;
} else if (p_v->Ai[v_iter] < p_u->Ai[u_iter]) {
v_iter += 1;
} else {
p_r->Ai.push_back(p_u->Ai[u_iter]);
p_r->Ax.push_back(function(p_u->Ax[u_iter], p_v->Ax[v_iter]));
u_iter += 1;
v_iter += 1;
}
}

return Status::Ok;
}
Status execute_spNdn(const DispatchContext& ctx) {
TIME_PROFILE_SCOPE("cpu/vector_emult_spNdn");

auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();

r->validate_wd(FormatVector::CpuCoo);
u->validate_rw(FormatVector::CpuCoo);
v->validate_rw(FormatVector::CpuDense);

CpuCooVec<T>* p_r = r->template get<CpuCooVec<T>>();
const CpuCooVec<T>* p_u = u->template get<CpuCooVec<T>>();
const CpuDenseVec<T>* p_v = v->template get<CpuDenseVec<T>>();
const auto& function = op->function;
const auto skip = v->get_fill_value();

assert(p_r->Ai.empty());
assert(p_r->Ax.empty());

for (uint k = 0; k < p_u->values; k++) {
const uint i = p_u->Ai[k];

if (p_v->Ax[i] != skip) {
p_r->Ai.push_back(i);
p_r->Ax.push_back(function(p_u->Ax[k], p_v->Ax[i]));
}
}

return Status::Ok;
}
Status execute_dnNsp(const DispatchContext& ctx) {
TIME_PROFILE_SCOPE("cpu/vector_emult_dnNsp");

auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();

r->validate_wd(FormatVector::CpuCoo);
u->validate_rw(FormatVector::CpuDense);
v->validate_rw(FormatVector::CpuCoo);

CpuCooVec<T>* p_r = r->template get<CpuCooVec<T>>();
const CpuDenseVec<T>* p_u = u->template get<CpuDenseVec<T>>();
const CpuCooVec<T>* p_v = v->template get<CpuCooVec<T>>();
const auto& function = op->function;
const auto skip = u->get_fill_value();

assert(p_r->Ai.empty());
assert(p_r->Ax.empty());

for (uint k = 0; k < p_v->values; k++) {
const uint i = p_v->Ai[k];

if (p_u->Ax[i] != skip) {
p_r->Ai.push_back(i);
p_r->Ax.push_back(function(p_u->Ax[i], p_v->Ax[k]));
}
}

return Status::Ok;
}
};

}// namespace spla

#endif//SPLA_CPU_V_EMULT_HPP
Loading

0 comments on commit 2e2d856

Please sign in to comment.