Q1_Final.json

{"operators":[{"operatorID":"ScalaUDF-operator-62b8ecc5-9699-4c0f-860d-32bc9586c908","operatorType":"ScalaUDF","operatorVersion":"bfcdd448e9d4ff69a973c29ccf008ec9b5baed71","operatorProperties":{"code":"import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor\nimport edu.uci.ics.amber.engine.common.tuple.amber.TupleLike\nimport edu.uci.ics.texera.workflow.common.tuple.Tuple\nimport edu.uci.ics.amber.engine.common.workflow.PortIdentity\nimport edu.uci.ics.amber.engine.common.{CheckpointState, CheckpointSupport}\nimport scala.Function1\n\nclass ScalaUDFOpExec extends OperatorExecutor with CheckpointSupport {\n\n  // A variable to keep track of the number of input tuples processed\n private var accumulativeAmount: Int = 0\n\n  // Process each tuple, incrementing the count\n  override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = {\n    val amount:Int = tuple.getField(\"amount\")\n    accumulativeAmount += amount\n    Iterator(TupleLike(amount, accumulativeAmount))  // Return the tuple as is (for demonstration purposes)\n  }\n\n  // Serialize the state, saving the current tuple count to the checkpoint\n  override def serializeState(\n      currentIteratorState: Iterator[(TupleLike, Option[PortIdentity])],\n      checkpoint: CheckpointState\n  ): Iterator[(TupleLike, Option[PortIdentity])] = {\n    checkpoint.save(\"accumulativeAmount\", accumulativeAmount)\n    currentIteratorState\n  }\n\n  // Deserialize the state, restoring the tuple count from the checkpoint\n  override def deserializeState(\n      checkpoint: CheckpointState\n  ): Iterator[(TupleLike, Option[PortIdentity])] = {\n    open()  // Optional: Restore the necessary state\n    accumulativeAmount = checkpoint.load(\"accumulativeAmount\").asInstanceOf[Int]\n    // Optionally, recreate the tuple stream based on the restored state\n    Iterator.empty\n  }\n\n  // Provide an estimated cost for checkpointing (optional)\n  override def getEstimatedCheckpointCost: Long = 0L\n\n  override def getState:String = s\"accumulativeAmount: $accumulativeAmount\"\n}\n","workers":1,"retainInputColumns":false,"outputColumns":[{"attributeName":"amount","attributeType":"integer"},{"attributeName":"accumulativeAmount","attributeType":"integer"}]},"inputPorts":[{"portID":"input-0","displayName":"","allowMultiInputs":true,"isDynamicPort":false,"dependencies":[]}],"outputPorts":[{"portID":"output-0","displayName":"","allowMultiInputs":false,"isDynamicPort":false}],"showAdvanced":false,"isDisabled":false,"customDisplayName":"Scala UDF","dynamicInputPorts":true,"dynamicOutputPorts":true},{"operatorID":"ScalaUDFSource-operator-2e26f979-cbf8-40ce-8452-fc46b26cea3d","operatorType":"ScalaUDFSource","operatorVersion":"bfcdd448e9d4ff69a973c29ccf008ec9b5baed71","operatorProperties":{"code":"import edu.uci.ics.amber.engine.common.{CheckpointState, CheckpointSupport}\nimport edu.uci.ics.amber.engine.common.SourceOperatorExecutor\nimport edu.uci.ics.amber.engine.common.tuple.amber.TupleLike\nimport edu.uci.ics.amber.engine.common.workflow.PortIdentity\nimport scala.concurrent.duration._\nimport java.time.Instant\nimport scala.util.Random\n\nclass ScalaUDFOpExec extends SourceOperatorExecutor with CheckpointSupport {\n\n  // Variable to maintain the current index of tuple production\n  var currentIndex: Int = 0\n  val totalTuplesToProduce: Int = 1000  // Total number of tuples to produce\n  val errorFrequency: Int = 159        // Introduce an error tuple every 50 tuples\n\n  // Set a specific seed for random number generation to make it deterministic\n  var random = new Random(12345L)  // 12345L is the seed\n\n  // Produce tuples lazily using an iterator, wrapping all computation within the iterator\n  override def produceTuple(): Iterator[TupleLike] = new Iterator[TupleLike] {\n    \n    // Return true if more tuples need to be produced\n    override def hasNext: Boolean = currentIndex < totalTuplesToProduce\n\n    // Generate the next tuple\n    override def next(): TupleLike = {\n      val tupleLike = if (currentIndex % errorFrequency == 0 && currentIndex != 0) {\n        // Create an error tuple every `errorFrequency` tuples\n        createErrorTransaction()\n      } else {\n        // Create a valid transaction tuple\n        val validTuple = createValidTransaction()\n        validTuple\n      }\n\n      // Update the current index\n      currentIndex += 1\n\n      // Introduce a 0.1-second delay between each tuple\n      Thread.sleep(500)\n\n      // Return the generated tuple\n      tupleLike\n    }\n  }\n\n  // Method to create a valid transaction and return it as a TupleLike\n  private def createValidTransaction(): TupleLike = {\n    val transactionID = s\"txn-${currentIndex}\"\n    val amount = random.between(3,500)  // Random amount between 0 and 1000\n    // Returning the data directly as TupleLike\n    TupleLike(transactionID, amount)\n  }\n\n\n  // Method to create an error transaction and return it as a TupleLike\n  private def createErrorTransaction(): TupleLike = {\n    TupleLike(\"txn\", -4786900)\n  }\n\n  // Serialize the state (saving the current index, accumulated sum, total time gap, and transaction count to the checkpoint)\n  override def serializeState(\n      currentIteratorState: Iterator[(TupleLike, Option[PortIdentity])],\n      checkpoint: CheckpointState\n  ): Iterator[(TupleLike, Option[PortIdentity])] = {\n    // Save important states to the checkpoint\n    checkpoint.save(\"currentIndex\", currentIndex)\n    checkpoint.save(\"random\", random)\n\n    currentIteratorState\n  }\n\n  // Deserialize the state (restoring the current index, accumulated sum, total time gap, and transaction count from the checkpoint)\n  override def deserializeState(\n      checkpoint: CheckpointState\n  ): Iterator[(TupleLike, Option[PortIdentity])] = {\n    // Restore the saved states from the checkpoint\n    currentIndex = checkpoint.load(\"currentIndex\").asInstanceOf[Int]\n    random = checkpoint.load(\"random\").asInstanceOf[Random]\n    \n    // Continue producing tuples starting from the restored index\n    produceTuple().map(tuple => (tuple, Option.empty))\n  }\n\n  // Estimate the cost of checkpointing (you can customize this based on the use case)\n  override def getEstimatedCheckpointCost: Long = 0L\n\n  // Override getState to show accumulated sum and average gap between transactions\n  override def getState: String = \"\"\n}\n","workers":1,"columns":[{"attributeName":"transactionID","attributeType":"string"},{"attributeName":"amount","attributeType":"integer"}]},"inputPorts":[],"outputPorts":[{"portID":"output-0","displayName":"","allowMultiInputs":false,"isDynamicPort":false}],"showAdvanced":false,"isDisabled":false,"customDisplayName":"Blackbox","dynamicInputPorts":false,"dynamicOutputPorts":false},{"operatorID":"ScalaUDF-operator-55707c07-adfd-4d7e-9f9c-1de3147544c2","operatorType":"ScalaUDF","operatorVersion":"bfcdd448e9d4ff69a973c29ccf008ec9b5baed71","operatorProperties":{"code":"import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor\nimport edu.uci.ics.amber.engine.common.tuple.amber.TupleLike\nimport edu.uci.ics.texera.workflow.common.tuple.Tuple\nimport edu.uci.ics.amber.engine.common.workflow.PortIdentity\nimport edu.uci.ics.amber.engine.common.{CheckpointState, CheckpointSupport}\nimport scala.Function1\n\nclass ScalaUDFOpExec extends OperatorExecutor with CheckpointSupport {\n\n  var numRejected:Int = 0\n  // Process each tuple, incrementing the count\n  override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = {\n    val amount:Int = tuple.getField(\"amount\")\n    val accumulatedAmount:Int = tuple.getField(\"accumulativeAmount\")\n    if(accumulatedAmount < 0){\n        numRejected += 1\n    }\n    Iterator(TupleLike(amount, accumulatedAmount < 0))  // Return the tuple as is (for demonstration purposes)\n  }\n\n  // Serialize the state, saving the current tuple count to the checkpoint\n  override def serializeState(\n      currentIteratorState: Iterator[(TupleLike, Option[PortIdentity])],\n      checkpoint: CheckpointState\n  ): Iterator[(TupleLike, Option[PortIdentity])] = {\n    checkpoint.save(\"numRejected\", numRejected)\n    currentIteratorState\n  }\n\n  // Deserialize the state, restoring the tuple count from the checkpoint\n  override def deserializeState(\n      checkpoint: CheckpointState\n  ): Iterator[(TupleLike, Option[PortIdentity])] = {\n    open()  // Optional: Restore the necessary state\n    numRejected = checkpoint.load(\"numRejected\").asInstanceOf[Int]\n    // Optionally, recreate the tuple stream based on the restored state\n    Iterator.empty\n  }\n\n  // Provide an estimated cost for checkpointing (optional)\n  override def getEstimatedCheckpointCost: Long = 0L\n\n  override def getState:String = s\"numRejected:$numRejected\"\n}\n","workers":1,"retainInputColumns":false,"outputColumns":[{"attributeName":"amount","attributeType":"integer"},{"attributeName":"is_rejected","attributeType":"boolean"}]},"inputPorts":[{"portID":"input-0","displayName":"","allowMultiInputs":true,"isDynamicPort":false,"dependencies":[]}],"outputPorts":[{"portID":"output-0","displayName":"","allowMultiInputs":false,"isDynamicPort":false}],"showAdvanced":false,"isDisabled":false,"customDisplayName":"Scala UDF","dynamicInputPorts":true,"dynamicOutputPorts":true}],"operatorPositions":{"ScalaUDF-operator-62b8ecc5-9699-4c0f-860d-32bc9586c908":{"x":-91,"y":-249},"ScalaUDFSource-operator-2e26f979-cbf8-40ce-8452-fc46b26cea3d":{"x":-412,"y":-247},"ScalaUDF-operator-55707c07-adfd-4d7e-9f9c-1de3147544c2":{"x":267,"y":-250}},"links":[{"linkID":"link-10c70aeb-13cf-4c46-80d9-482f95b84b30","source":{"operatorID":"ScalaUDFSource-operator-2e26f979-cbf8-40ce-8452-fc46b26cea3d","portID":"output-0"},"target":{"operatorID":"ScalaUDF-operator-62b8ecc5-9699-4c0f-860d-32bc9586c908","portID":"input-0"}},{"linkID":"c84c29e7-4977-42bf-9a6c-761e6d64966f","source":{"operatorID":"ScalaUDF-operator-62b8ecc5-9699-4c0f-860d-32bc9586c908","portID":"output-0"},"target":{"operatorID":"ScalaUDF-operator-55707c07-adfd-4d7e-9f9c-1de3147544c2","portID":"input-0"}}],"groups":[],"commentBoxes":[]}