diff --git a/python/heterocl/compute_api.py b/python/heterocl/compute_api.py
index ba779821b..304642d39 100644
--- a/python/heterocl/compute_api.py
+++ b/python/heterocl/compute_api.py
@@ -1,6 +1,7 @@
 """Compute APIs in HeteroCL"""
 #pylint: disable=no-member, redefined-builtin, too-many-arguments, missing-docstring
 import numbers
+import numpy as np
 from collections import OrderedDict
 from .tvm import expr as _expr, stmt as _stmt, make as _make
 from .tvm.api import _IterVar, min_value
@@ -387,12 +388,12 @@ def scalar(init=0, name=None, dtype=None):
     name = get_name("scalar", name)
     return compute((1,), lambda x: init, name, dtype)
 
-def copy(tensor, name=None):
+def copy(tensor, name=None, dtype=None):
     """A syntactic sugar for copying an existing tensor.
 
     Parameters
     ----------
-    tensor : Tensor
+    tensor : Tensor or list or numpy.ndarray
         The tensor to be copied from
 
     name : str, optional
@@ -401,9 +402,71 @@ def copy(tensor, name=None):
     Returns
     -------
     Tensor
+
+    Examples
+    --------
+    .. code-block:: python
+
+        # example 1 - copy from a HeteroCL tensor
+        A = hcl.placeholder((10,), "A", hcl.UInt(32))
+        B1 = hcl.copy(A, "B1")
+
+        # example 2 - copy from a Python list
+        pA = [[1, 2, 3], [4, 5, 6]]
+        # The data type is NOT inferred from the list
+        B2 = hcl.copy(pA, "B2", hcl.Int())
+
+        # example 3 - copy from a Numpy array
+        nA = numpy.array(pA)
+        # The data type is determined by using nA.dtype
+        B3 = hcl.copy(nA, "B3")
     """
     name = get_name("copy", name)
-    return compute(tensor.shape, lambda *args: tensor[args], name, tensor.dtype)
+    if isinstance(tensor, Tensor):
+        return compute(
+                tensor.shape,
+                lambda *args: tensor[args],
+                name,
+                tensor.dtype)
+    elif isinstance(tensor, (list, np.ndarray)):
+        if isinstance(tensor, np.ndarray):
+            shape = tensor.shape
+            _tensor = tensor = tensor.tolist()
+        else:
+            _tensor = tensor
+            shape = []
+            while isinstance(_tensor, list):
+                shape.append(len(_tensor))
+                _tensor = _tensor[0]
+            shape = tuple(shape)
+
+
+        def _iter_tensor(_tensor, tensor, indices, buffer_var):
+            if isinstance(tensor, list):
+                for x in range(0, len(tensor)):
+                    indices.append(x)
+                    _iter_tensor(_tensor, tensor[x],
+                                 indices, buffer_var)
+                    indices.pop()
+            else:
+                index, _, _ = get_index(shape, indices, 0)
+                stage.emit(
+                        _make.Store(
+                            buffer_var,
+                            _make.Cast(stage._dtype, tensor),
+                            index))
+
+        with Stage(name, dtype, shape) as stage:
+            _tensor = Tensor(shape, stage._dtype, name, stage._buf)
+            _iter_tensor(_tensor, tensor, [], _tensor._buf.data)
+            stage.lhs_tensors.add(_tensor)
+            for t in stage.lhs_tensors:
+                t.last_update = stage
+        _tensor._tensor = stage._op
+        return _tensor
+    else:
+        raise APIError("Unkown tensor type. Should be either HeteroCL tensor, \
+                Python list, or Numpy array.")
 
 def unpack(tensor, axis=0, factor=None, name=None, dtype=None):
     """Unpack a tensor with larger bitwidth to a tensor with smaller bitwidth.
diff --git a/python/heterocl/schedule.py b/python/heterocl/schedule.py
index 03af1cf3e..2cf6eeb6e 100644
--- a/python/heterocl/schedule.py
+++ b/python/heterocl/schedule.py
@@ -137,14 +137,14 @@ def reuse_at(self, target, parent, axis, name=None):
 
     def to(self, tensors, dst, src=None,
            stream_type=_expr.StreamExpr.Channel, depth=10, name=None):
-        """Stream a list of Tensors to dst devices 
-        
+        """Stream a list of Tensors to dst devices
+
         Parameters
         ----------
         tensors : list of Tensor
             The tensors to be moved
 
-        dst : device or module 
+        dst : device or module
             The tensors to be moved
 
         stream_type : {FIFO, Channel, Burst}, optional
@@ -155,7 +155,7 @@ def to(self, tensors, dst, src=None,
         rets = []
         if not isinstance(tensors, list):
             tensors = [tensors]
-        for tensor in tensors: 
+        for tensor in tensors:
             try:
                 target = tensor.tensor
             except (AttributeError, ValueError):
@@ -165,7 +165,7 @@ def to(self, tensors, dst, src=None,
                     target = tensor
             if name is None:
                 name = target.name + ".stream"
-            ret = self.sch.to(target, dst, src, 
+            ret = self.sch.to(target, dst, src,
                               stream_type, depth, name)
             name = None
             rets.append(ret)
@@ -339,7 +339,7 @@ def __exit__(self, ptype, value, trace):
         # create the output operation
         input_ops = [i._op for i in self.input_stages]
         input_bufs = [i._buf for i in self.input_stages]
-        output_bufs = [self._buf] 
+        output_bufs = [self._buf]
         body = self.pop_stmt()
         Stage._current.pop()
         op = _ExternOp(self.name, "", self.axis_list, input_ops,
diff --git a/tests/test_compute_basic.py b/tests/test_compute_basic.py
index ba0e1ec7a..39e75b4c8 100644
--- a/tests/test_compute_basic.py
+++ b/tests/test_compute_basic.py
@@ -157,6 +157,29 @@ def kernel(A, B):
     for i in range(0, 10):
         assert ret_B[i] == np_A[i]+1
 
+def test_copy():
+    hcl.init()
+
+    np_A = numpy.random.randint(10, size=(10, 10, 10))
+    py_A = np_A.tolist()
+
+    def kernel():
+        cp1 = hcl.copy(np_A)
+        cp2 = hcl.copy(py_A)
+        return hcl.compute(np_A.shape, lambda *x: cp1[x] + cp2[x])
+
+    O = hcl.placeholder(np_A.shape)
+    s = hcl.create_schedule([], kernel)
+    f = hcl.build(s)
+
+    np_O = numpy.zeros(np_A.shape)
+    hcl_O = hcl.asarray(np_O, dtype=hcl.Int(32))
+
+    f(hcl_O)
+
+    assert numpy.array_equal(hcl_O.asnumpy(), np_A*2)
+
+
 def test_mutate_basic():
 
     def kernel(A, B):