-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathali_twitter_final_model.json
1 lines (1 loc) · 42.6 KB
/
ali_twitter_final_model.json
1
{"paragraphs":[{"text":"%pyspark\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom pyspark.sql import SQLContext\nfrom pyspark.sql import functions as F\nfrom pyspark.sql import types as T\nfrom pyspark.sql import Row\nfrom pyspark.sql.window import Window\n\nfrom transformers import BertTokenizer\ntokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False)\n\[email protected](\"String\")\ndef decode_tokens(tokens):\n return tokenizer.decode(tokens)\n\nsqc = SQLContext(sc)","user":"anonymous","dateUpdated":"2020-06-15T17:20:55+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592164757410_-839404848","id":"20200614-195917_758422713","dateCreated":"2020-06-14T19:59:17+0000","dateStarted":"2020-06-15T17:20:55+0000","dateFinished":"2020-06-15T17:21:13+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:27121"},{"text":"%pyspark\nroot_file_path = \"/nas_ssd_social_media_analytics/ali_twitter/final_dataset_12062020/temp/\"\ntraining_parquet_path = root_file_path+\"training_df\"\nvalidation_parquet_path = root_file_path+\"val_df\"\ntest_parquet_path = root_file_path+\"test_df\"\n\ntraining_df = sqc.read.parquet(training_parquet_path)\nvalidation_df = sqc.read.parquet(validation_parquet_path)\ntest_df = sqc.read.parquet(test_parquet_path)","user":"anonymous","dateUpdated":"2020-06-15T17:20:57+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592164767995_-1760920210","id":"20200614-195927_1405233305","dateCreated":"2020-06-14T19:59:27+0000","dateStarted":"2020-06-15T17:20:59+0000","dateFinished":"2020-06-15T17:21:15+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27122"},{"text":"%pyspark\ntraining_data = training_df\\\n.withColumn(\"reply\", F.when(F.col(\"reply_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.withColumn(\"retweet\", F.when(F.col(\"retweet_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.withColumn(\"rtWithCmt\", F.when(F.col(\"retweet_with_comment_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.withColumn(\"like\", F.when(F.col(\"like_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.select(\"tweet_id\", \"engager_user_id\", \"engagee_user_id\", \"reply\", \"retweet\", \"rtWithCmt\", \"like\")","user":"anonymous","dateUpdated":"2020-06-15T14:46:01+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592164778810_-1301701324","id":"20200614-195938_329745939","dateCreated":"2020-06-14T19:59:38+0000","dateStarted":"2020-06-15T14:46:10+0000","dateFinished":"2020-06-15T14:46:13+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27123"},{"text":"%pyspark\n\ntraining_data = training_df\\\n.withColumn(\"reply\", F.when(F.col(\"reply_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.withColumn(\"retweet\", F.when(F.col(\"retweet_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.withColumn(\"rtWithCmt\", F.when(F.col(\"retweet_with_comment_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.withColumn(\"like\", F.when(F.col(\"like_engagement_timestamp\").isNull(), 0).otherwise(1))\\\n.select(\"tweet_id\", \"engager_user_id\", \"engagee_user_id\", \"reply\", \"retweet\", \"rtWithCmt\", \"like\")\n\nfrom pyspark.ml import Pipeline, PipelineModel\nfrom pyspark.ml.feature import VectorAssembler\n\ncreate_tweet_features = PipelineModel.load(root_file_path+\"create_tweet_features_model\")\ncreate_engager_user_features = PipelineModel.load(root_file_path+\"create_engager_user_features_model\")\ncreate_engagee_user_features = PipelineModel.load(root_file_path+\"create_engagee_user_features_model\")\n\ntweet_features = sqc.read.parquet(root_file_path+\"training_tweets\")\ntweet_features = create_tweet_features.transform(tweet_features).select(\"tweet_id\", \"tweet_features\")\n\nengager_features = sqc.read.parquet(root_file_path+\"training_engager_user_df.parquet\")\nengager_features = create_engager_user_features.transform(engager_features).select(\"engager_user_id\", \"engager_features\")\n\nengagee_features = sqc.read.parquet(root_file_path+\"training_engagee_user_df.parquet\")\nengagee_features = create_engagee_user_features.transform(engagee_features).select(\"engagee_user_id\", \"engagee_features\")\n\ntraining_data = training_data.join(tweet_features, \"tweet_id\")\ntraining_data = training_data.join(engager_features, \"engager_user_id\")\ntraining_data = training_data.join(engagee_features, \"engagee_user_id\")\n# training_data.show()\n\nassemblerInputs = [\"tweet_features\", \"engager_features\", \"engagee_features\"]\nassembler = VectorAssembler(inputCols=assemblerInputs, outputCol=\"featuresAssembled\")\n\ntraining_data = assembler.transform(training_data).drop(*assemblerInputs)\ntraining_data.write.parquet(root_file_path+\"training_data.parquet\")","user":"anonymous","dateUpdated":"2020-06-14T22:03:26+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592165080509_920821675","id":"20200614-200440_523601547","dateCreated":"2020-06-14T20:04:40+0000","dateStarted":"2020-06-14T22:03:26+0000","dateFinished":"2020-06-14T22:22:33+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27124"},{"text":"%pyspark\ntraining_data = sqc.read.parquet(root_file_path+\"training_data.parquet\")","user":"anonymous","dateUpdated":"2020-06-15T17:21:12+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592173491860_-2036024346","id":"20200614-222451_380186450","dateCreated":"2020-06-14T22:24:51+0000","dateStarted":"2020-06-15T17:21:13+0000","dateFinished":"2020-06-15T17:21:17+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27125"},{"text":"%pyspark\ntraining_data.show()","user":"anonymous","dateUpdated":"2020-06-15T17:21:15+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"+--------------------+--------------------+--------------------+-----+-------+---------+----+--------------------+\n| engagee_user_id| engager_user_id| tweet_id|reply|retweet|rtWithCmt|like| featuresAssembled|\n+--------------------+--------------------+--------------------+-----+-------+---------+----+--------------------+\n|00007622B2D4B1996...|ED2ACF4AF0C6BD884...|5E9F27BD16E612989...| 0| 0| 0| 1|(64,[4,8,15,18,19...|\n|00007622B2D4B1996...|48940449BD112BC9C...|309590775D9145876...| 0| 0| 0| 0|(64,[0,1,5,6,10,1...|\n|00007622B2D4B1996...|36C56A4B9FAB9490F...|B348F1EE8D236AE51...| 0| 0| 0| 1|(64,[6,7,8,9,10,1...|\n|00007622B2D4B1996...|10C1C295EFAE02455...|88A9909A4B5752C2F...| 0| 0| 0| 1|(64,[1,5,6,7,8,20...|\n|00007622B2D4B1996...|4D6DA0D95722ED3B0...|A15F461D6E1AB1057...| 0| 0| 0| 0|(64,[1,2,3,4,5,6,...|\n|00007622B2D4B1996...|F88C104DC8A1A83CC...|874BD54C0DA6FD010...| 0| 0| 0| 1|(64,[0,1,3,4,6,7,...|\n|00008B878F2FE6639...|02021C2F320530DE5...|D302012D0D0BEFF69...| 0| 0| 0| 1|(64,[2,3,4,6,7,10...|\n|0000ADAF7A30CBFC8...|FA4479E37A1CA86A6...|5E688D69D453E5F98...| 0| 0| 0| 1|(64,[0,1,2,6,7,8,...|\n|0000ADAF7A30CBFC8...|92D9ED0D028C10130...|9B479D2111756938A...| 0| 0| 0| 1|(64,[3,5,6,7,8,13...|\n|0000ADAF7A30CBFC8...|EE702AD901D69F8B2...|46D16E05D4DDFC649...| 0| 0| 0| 0|(64,[0,3,5,8,9,10...|\n|00016050DFE263ABD...|57F0738938EED522C...|374ABDBB214142748...| 0| 0| 0| 1|(64,[3,4,5,7,11,1...|\n|00016050DFE263ABD...|8E8D32C17A07174FC...|399FD80CF65B2BE09...| 0| 0| 0| 1|(64,[0,1,3,6,7,10...|\n|00016050DFE263ABD...|5AEE78C40301B1BEB...|5118DD2142B42B13A...| 0| 0| 0| 1|(64,[0,1,2,3,4,6,...|\n|000256375EC67511A...|85340C04C1B01FFBB...|EA90A63BB706B6039...| 0| 0| 0| 0|(64,[1,2,4,5,6,7,...|\n|000256375EC67511A...|4539BC9A29CA451CA...|6203DE01C85BDA99A...| 0| 0| 0| 0|(64,[2,3,14,15,19...|\n|000256375EC67511A...|8D7006E44BDA5301B...|C1E729AE3EB7290FB...| 0| 0| 0| 0|(64,[5,6,7,12,15,...|\n|000256375EC67511A...|F8FE48C85DA537296...|34FC1B3F91B195E4C...| 0| 0| 0| 0|(64,[0,1,2,3,6,7,...|\n|0002BCD4EF7E46834...|97E9089E5EE03AA39...|045B5748449C5CE68...| 0| 0| 0| 0|(64,[2,3,11,18,20...|\n|0002BCD4EF7E46834...|CFB4E957E69FB4385...|5260D061E66F221B0...| 0| 0| 0| 0|(64,[4,5,17,20,21...|\n|00032AE7A56145EA4...|1C61624BC923D3944...|416D71D336864B77F...| 0| 0| 0| 1|(64,[3,5,6,11,13,...|\n+--------------------+--------------------+--------------------+-----+-------+---------+----+--------------------+\nonly showing top 20 rows\n\n"}]},"apps":[],"jobName":"paragraph_1592171757886_1498532446","id":"20200614-215557_1520516457","dateCreated":"2020-06-14T21:55:57+0000","dateStarted":"2020-06-15T17:21:16+0000","dateFinished":"2020-06-15T17:21:19+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27126"},{"title":"train for likes","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_like_model = GBTClassifier(labelCol=\"like\", featuresCol=\"featuresAssembled\", maxDepth=4, maxIter=5).fit(training_data)\ngbt_like_model.save(root_file_path+\"models/gbt_like\")","user":"anonymous","dateUpdated":"2020-06-15T17:21:42+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592177964289_1128837327","id":"20200614-233924_1451181183","dateCreated":"2020-06-14T23:39:24+0000","dateStarted":"2020-06-14T23:46:36+0000","dateFinished":"2020-06-15T00:01:09+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27127"},{"text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_like_model = GBTClassifier(labelCol=\"like\", featuresCol=\"featuresAssembled\").fit(training_data)\ngbt_like_model.save(root_file_path+\"models/gbt_like2\")","user":"anonymous","dateUpdated":"2020-06-15T17:21:42+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592241690225_-1597514331","id":"20200615-172130_297478272","dateCreated":"2020-06-15T17:21:30+0000","dateStarted":"2020-06-15T17:21:43+0000","dateFinished":"2020-06-15T18:19:53+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27128"},{"title":"load trained model for likes","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassificationModel\nlike_model_loaded = GBTClassificationModel.load(root_file_path+\"models/gbt_like\")","user":"anonymous","dateUpdated":"2020-06-15T12:33:56+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592178341202_1407034894","id":"20200614-234541_27087849","dateCreated":"2020-06-14T23:45:41+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27129"},{"title":"train for likes","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\nfrom pyspark.ml.evaluation import BinaryClassificationEvaluator\nfrom pyspark.ml.tuning import CrossValidator, ParamGridBuilder\n\n\ngbt = GBTClassifier(labelCol=\"like\",\n featuresCol=\"featuresAssembled\")\n\nevaluator = BinaryClassificationEvaluator(labelCol=\"like\")\n\n# no parameter search\nparamGrid = ParamGridBuilder()\\\n .addGrid(gbt.maxIter, [5, 20, 50]) \\\n .addGrid(gbt.maxDepth, [3, 4, 5]) \\\n .addGrid(gbt.minInfoGain, [0.1, 0.01]) \\\n .build()\n\n# 6-fold cross validation\ncrossval = CrossValidator(\n estimator=gbt, estimatorParamMaps=paramGrid, evaluator=evaluator) #, numFolds=6\n\n\nmodel = crossval.fit(training_data)","user":"anonymous","dateUpdated":"2020-06-15T17:20:41+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true,"editorHide":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592226049393_129860127","id":"20200615-130049_158516459","dateCreated":"2020-06-15T13:00:49+0000","dateStarted":"2020-06-15T14:50:25+0000","dateFinished":"2020-06-15T17:19:52+0000","status":"ABORT","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:27130"},{"title":"train for reply","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_reply_model = GBTClassifier(labelCol=\"reply\", featuresCol=\"featuresAssembled\").fit(training_data)\ngbt_reply_model.save(root_file_path+\"models/gbt_reply2\")","user":"anonymous","dateUpdated":"2020-06-15T18:44:24+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592224332217_-1660203972","id":"20200615-123212_927722777","dateCreated":"2020-06-15T12:32:12+0000","dateStarted":"2020-06-15T18:44:24+0000","dateFinished":"2020-06-15T19:37:43+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27131"},{"text":"%pyspark\nreply_model_loaded = GBTClassificationModel.load(root_file_path+\"models/gbt_reply\")","user":"anonymous","dateUpdated":"2020-06-15T12:34:19+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592224440769_75623894","id":"20200615-123400_2041355630","dateCreated":"2020-06-15T12:34:00+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27132"},{"title":"train for retweet","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_retweet_model = GBTClassifier(labelCol=\"retweet\", featuresCol=\"featuresAssembled\").fit(training_data)\ngbt_retweet_model.save(root_file_path+\"models/gbt_retweet2\")","user":"anonymous","dateUpdated":"2020-06-15T19:41:57+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592224344603_-1278470559","id":"20200615-123224_1530529345","dateCreated":"2020-06-15T12:32:24+0000","dateStarted":"2020-06-15T19:44:41+0000","dateFinished":"2020-06-15T20:41:34+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27133"},{"text":"%pyspark\nretweet_model_loaded = GBTClassificationModel.load(root_file_path+\"models/gbt_retweet\")","user":"anonymous","dateUpdated":"2020-06-15T12:34:29+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592224450794_590262922","id":"20200615-123410_1970497792","dateCreated":"2020-06-15T12:34:10+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27134"},{"title":"train for rtwithCmt","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_rtWithCmt_model = GBTClassifier(labelCol=\"rtWithCmt\", featuresCol=\"featuresAssembled\").fit(training_data)\ngbt_rtWithCmt_model.save(root_file_path+\"models/gbt_rtWithCmt2\")\n","user":"anonymous","dateUpdated":"2020-06-15T20:31:39+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592224354123_79718398","id":"20200615-123234_716598560","dateCreated":"2020-06-15T12:32:34+0000","dateStarted":"2020-06-15T20:31:39+0000","dateFinished":"2020-06-15T21:34:24+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27135"},{"text":"%pyspark\nretwithCmt_model_loaded = GBTClassificationModel.load(root_file_path+\"models/gbt_rtWithCmt\")","user":"anonymous","dateUpdated":"2020-06-15T12:34:54+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592224469970_393803543","id":"20200615-123429_431450761","dateCreated":"2020-06-15T12:34:29+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27136"},{"text":"%pyspark\nvalidation_data = validation_df\\\n.select(\"tweet_id\", \"engager_user_id\", \"engagee_user_id\")\n\nfrom pyspark.ml import Pipeline, PipelineModel\nfrom pyspark.ml.feature import VectorAssembler\n\ncreate_tweet_features = PipelineModel.load(root_file_path+\"create_tweet_features_model\")\ncreate_engager_user_features = PipelineModel.load(root_file_path+\"create_engager_user_features_model\")\ncreate_engagee_user_features = PipelineModel.load(root_file_path+\"create_engagee_user_features_model\")\n\nvalidation_features = sqc.read.parquet(root_file_path+\"validation_tweets\")\nvalidation_features = create_tweet_features.transform(validation_features).select(\"tweet_id\", \"tweet_features\")\n\nengager_features = sqc.read.parquet(root_file_path+\"validation_engager_user_df.parquet\")\nengager_features = create_engager_user_features.transform(engager_features).select(\"engager_user_id\", \"engager_features\")\n\nengagee_features = sqc.read.parquet(root_file_path+\"validation_engagee_user_df.parquet\")\nengagee_features = create_engagee_user_features.transform(engagee_features).select(\"engagee_user_id\", \"engagee_features\")\n\nvalidation_data = validation_data.join(validation_features, \"tweet_id\")\nvalidation_data = validation_data.join(engager_features, \"engager_user_id\")\nvalidation_data = validation_data.join(engagee_features, \"engagee_user_id\")\n# validation_data.show()\n\nassemblerInputs = [\"tweet_features\", \"engager_features\", \"engagee_features\"]\nassembler = VectorAssembler(inputCols=assemblerInputs, outputCol=\"featuresAssembled\")\n\nvalidation_data = assembler.transform(validation_data).drop(*assemblerInputs)\nvalidation_data.write.parquet(root_file_path+\"validation_data.parquet\")","user":"anonymous","dateUpdated":"2020-06-15T11:50:16+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592204953761_2136527199","id":"20200615-070913_98772716","dateCreated":"2020-06-15T07:09:13+0000","dateStarted":"2020-06-15T11:50:06+0000","dateFinished":"2020-06-15T11:53:14+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27137"},{"text":"%pyspark\nvalidation_data = sqc.read.parquet(root_file_path+\"validation_data.parquet\")","user":"anonymous","dateUpdated":"2020-06-15T08:18:38+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592209098447_1887438292","id":"20200615-081818_977433198","dateCreated":"2020-06-15T08:18:18+0000","dateStarted":"2020-06-15T08:18:38+0000","dateFinished":"2020-06-15T08:18:38+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27138"},{"title":"Predict for likes on Validation","text":"%pyspark\n\nvalidation_data = sqc.read.parquet(root_file_path+\"validation_data.parquet\")\n\nprint(\"making predictions ...\")\npredictions = gbt_like_model.transform(validation_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"validaiton_predictions/like2.csv\")","user":"anonymous","dateUpdated":"2020-06-15T18:32:05+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592206897373_-1919743643","id":"20200615-074137_1314894223","dateCreated":"2020-06-15T07:41:37+0000","dateStarted":"2020-06-15T18:32:05+0000","dateFinished":"2020-06-15T18:35:28+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27139"},{"title":"Predict for reply on Validation","text":"%pyspark\n\nprint(\"making predictions ...\")\npredictions = gbt_reply_model.transform(validation_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"validaiton_predictions/reply2.csv\")\n","user":"anonymous","dateUpdated":"2020-06-15T19:31:30+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592224236323_-1325611928","id":"20200615-123036_1437069229","dateCreated":"2020-06-15T12:30:36+0000","dateStarted":"2020-06-15T19:31:31+0000","dateFinished":"2020-06-15T19:41:02+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27140"},{"title":"Predict for retweet on Validation","text":"%pyspark\nprint(\"making predictions ...\")\npredictions = gbt_retweet_model.transform(validation_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"validaiton_predictions/retweet2.csv\")","user":"anonymous","dateUpdated":"2020-06-15T21:24:07+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592224250875_-1388888914","id":"20200615-123050_574329149","dateCreated":"2020-06-15T12:30:50+0000","dateStarted":"2020-06-15T21:24:07+0000","dateFinished":"2020-06-15T21:37:36+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27141"},{"title":"Predict for rtWithCmt on Validation","text":"%pyspark\nprint(\"making predictions ...\")\npredictions = gbt_rtWithCmt_model.transform(validation_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"validaiton_predictions/rtWithCmt2.csv\")","user":"anonymous","dateUpdated":"2020-06-15T21:24:09+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592224258983_835849769","id":"20200615-123058_1895249652","dateCreated":"2020-06-15T12:30:58+0000","dateStarted":"2020-06-15T21:34:25+0000","dateFinished":"2020-06-15T21:40:59+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27142"},{"text":"%pyspark\ntest_data = test_df\\\n.select(\"tweet_id\", \"engager_user_id\", \"engagee_user_id\")\n\nfrom pyspark.ml import Pipeline, PipelineModel\nfrom pyspark.ml.feature import VectorAssembler\n\ncreate_tweet_features = PipelineModel.load(root_file_path+\"create_tweet_features_model\")\ncreate_engager_user_features = PipelineModel.load(root_file_path+\"create_engager_user_features_model\")\ncreate_engagee_user_features = PipelineModel.load(root_file_path+\"create_engagee_user_features_model\")\n\ntest_features = sqc.read.parquet(root_file_path+\"test_tweets\")\ntest_features = create_tweet_features.transform(test_features).select(\"tweet_id\", \"tweet_features\")\n\nengager_features = sqc.read.parquet(root_file_path+\"test_engager_user_df.parquet\")\nengager_features = create_engager_user_features.transform(engager_features).select(\"engager_user_id\", \"engager_features\")\n\nengagee_features = sqc.read.parquet(root_file_path+\"test_engagee_user_df.parquet\")\nengagee_features = create_engagee_user_features.transform(engagee_features).select(\"engagee_user_id\", \"engagee_features\")\n\ntest_data = test_data.join(test_features, \"tweet_id\")\ntest_data = test_data.join(engager_features, \"engager_user_id\")\ntest_data = test_data.join(engagee_features, \"engagee_user_id\")\n# test_data.show()\n\nassemblerInputs = [\"tweet_features\", \"engager_features\", \"engagee_features\"]\nassembler = VectorAssembler(inputCols=assemblerInputs, outputCol=\"featuresAssembled\")\n\ntest_data = assembler.transform(test_data).drop(*assemblerInputs)\ntest_data.write.parquet(root_file_path+\"test_data.parquet\")","user":"anonymous","dateUpdated":"2020-06-15T00:08:06+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592178421161_-1404523589","id":"20200614-234701_2729318","dateCreated":"2020-06-14T23:47:01+0000","dateStarted":"2020-06-15T00:08:06+0000","dateFinished":"2020-06-15T00:10:53+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27143"},{"text":"%pyspark\ntest_data = sqc.read.parquet(root_file_path+\"test_data.parquet\")","user":"anonymous","dateUpdated":"2020-06-15T19:41:23+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[]},"apps":[],"jobName":"paragraph_1592179158813_1442075907","id":"20200614-235918_369292246","dateCreated":"2020-06-14T23:59:18+0000","dateStarted":"2020-06-15T19:41:23+0000","dateFinished":"2020-06-15T19:41:23+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27144"},{"title":"Predict for likes on test dataset","text":"%pyspark\n\nprint(\"making predictions ...\")\npredictions = gbt_like_model.transform(test_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"predictions/like2.csv\")","user":"anonymous","dateUpdated":"2020-06-15T19:41:30+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592178042327_-1271812211","id":"20200614-234042_6269265","dateCreated":"2020-06-14T23:40:42+0000","dateStarted":"2020-06-15T19:41:30+0000","dateFinished":"2020-06-15T19:48:20+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27145"},{"title":"Predict for reply on test data","text":"%pyspark\nprint(\"making predictions ...\")\npredictions = gbt_reply_model.transform(test_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"predictions/reply2.csv\")","user":"anonymous","dateUpdated":"2020-06-15T19:41:27+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592225159151_-1677029639","id":"20200615-124559_1039320408","dateCreated":"2020-06-15T12:45:59+0000","dateStarted":"2020-06-15T19:41:28+0000","dateFinished":"2020-06-15T19:44:41+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27146"},{"text":"%pyspark\n","user":"anonymous","dateUpdated":"2020-06-15T21:46:03+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592257563319_108723254","id":"20200615-214603_1809314038","dateCreated":"2020-06-15T21:46:03+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27147"},{"title":"Predict for rtWithCmt on test data","text":"%pyspark\nprint(\"making predictions ...\")\npredictions = gbt_rtWithCmt_model.transform(test_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"predictions/rtWithCmt2.csv\")","user":"anonymous","dateUpdated":"2020-06-15T21:24:15+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592225189200_1551041233","id":"20200615-124629_1043292335","dateCreated":"2020-06-15T12:46:29+0000","dateStarted":"2020-06-15T21:37:36+0000","dateFinished":"2020-06-15T21:44:24+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27148"},{"title":"Predict for retweet on test data","text":"%pyspark\nprint(\"making predictions ...\")\npredictions = gbt_retweet_model.transform(test_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"predictions/retweet2.csv\")","user":"anonymous","dateUpdated":"2020-06-15T21:24:16+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592225220681_620191348","id":"20200615-124700_2016138907","dateCreated":"2020-06-15T12:47:00+0000","dateStarted":"2020-06-15T21:40:59+0000","dateFinished":"2020-06-15T21:47:50+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27149"},{"text":"%pyspark\n","user":"anonymous","dateUpdated":"2020-06-15T12:47:56+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592225276697_-928977413","id":"20200615-124756_2044131897","dateCreated":"2020-06-15T12:47:56+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27150"},{"text":"%pyspark\n","user":"anonymous","dateUpdated":"2020-06-15T12:47:56+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592225276519_-338235560","id":"20200615-124756_1692330483","dateCreated":"2020-06-15T12:47:56+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27151"},{"text":"%pyspark\n","user":"anonymous","dateUpdated":"2020-06-15T12:47:56+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592225276347_185675998","id":"20200615-124756_568082628","dateCreated":"2020-06-15T12:47:56+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27152"},{"text":"%pyspark\n","user":"anonymous","dateUpdated":"2020-06-15T12:47:54+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592225274923_1036092810","id":"20200615-124754_100297409","dateCreated":"2020-06-15T12:47:54+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27153"},{"title":"Reply","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_reply_model = GBTClassifier(labelCol=\"reply\", featuresCol=\"featuresAssembled\", maxDepth=4, maxIter=5).fit(training_data)\ngbt_reply_model.save(root_file_path+\"models/gbt_reply\")\n\n\nprint(\"making predictions ...\")\npredictions = gbt_reply_model.transform(validation_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"validaiton_predictions/reply.csv\")\n\n\nprint(\"making predictions ...\")\npredictions = gbt_reply_model.transform(test_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"predictions/reply.csv\")\n","user":"anonymous","dateUpdated":"2020-06-15T08:19:26+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\nmaking predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592180043401_-833342840","id":"20200615-001403_903361333","dateCreated":"2020-06-15T00:14:03+0000","dateStarted":"2020-06-15T08:19:26+0000","dateFinished":"2020-06-15T08:38:51+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27154"},{"title":"retweet","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_retweet_model = GBTClassifier(labelCol=\"retweet\", featuresCol=\"featuresAssembled\", maxDepth=4, maxIter=5).fit(training_data)\ngbt_retweet_model.save(root_file_path+\"models/gbt_retweet\")\n\n\nprint(\"making predictions ...\")\npredictions = gbt_retweet_model.transform(validation_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"validaiton_predictions/retweet.csv\")\n\n\nprint(\"making predictions ...\")\npredictions = gbt_retweet_model.transform(test_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"predictions/retweet.csv\")","user":"anonymous","dateUpdated":"2020-06-15T08:35:22+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\nmaking predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592209166343_-1416483481","id":"20200615-081926_1455892884","dateCreated":"2020-06-15T08:19:26+0000","dateStarted":"2020-06-15T08:35:22+0000","dateFinished":"2020-06-15T08:58:45+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27155"},{"title":"rtWithCmt","text":"%pyspark\nfrom pyspark.ml.classification import GBTClassifier\n\ngbt_rtWithCmt_model = GBTClassifier(labelCol=\"rtWithCmt\", featuresCol=\"featuresAssembled\", maxDepth=4, maxIter=5).fit(training_data)\ngbt_rtWithCmt_model.save(root_file_path+\"models/gbt_rtWithCmt\")\n\n\nprint(\"making predictions ...\")\npredictions = gbt_rtWithCmt_model.transform(validation_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"validaiton_predictions/rtWithCmt.csv\")\n\n\nprint(\"making predictions ...\")\npredictions = gbt_rtWithCmt_model.transform(test_data)\n\nprint(\"writring predictions to file\")\nsplit1_udf = F.udf(lambda value: value[1].item(), T.DoubleType())\npredictions.select(\"tweet_id\", \"engagee_user_id\", split1_udf(\"probability\").alias(\"probability\"))\\\n.coalesce(1).write.csv(root_file_path+\"predictions/rtWithCmt.csv\")","user":"anonymous","dateUpdated":"2020-06-15T08:35:30+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"results":{"code":"SUCCESS","msg":[{"type":"TEXT","data":"making predictions ...\nwritring predictions to file\nmaking predictions ...\nwritring predictions to file\n"}]},"apps":[],"jobName":"paragraph_1592209230606_-1541130935","id":"20200615-082030_1107560107","dateCreated":"2020-06-15T08:20:30+0000","dateStarted":"2020-06-15T08:38:52+0000","dateFinished":"2020-06-15T09:17:08+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:27156"},{"text":"%pyspark\n","user":"anonymous","dateUpdated":"2020-06-15T11:59:00+0000","config":{"colWidth":12,"fontSize":9,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1592222340607_-1541655371","id":"20200615-115900_1102385453","dateCreated":"2020-06-15T11:59:00+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:27157"}],"name":"ali/twitter/final/model","id":"2F9VCGKF9","noteParams":{},"noteForms":{},"angularObjects":{"md:shared_process":[],"sh:shared_process":[],"spark:shared_process":[]},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}}