-
Notifications
You must be signed in to change notification settings - Fork 0
/
JetBrainsRepositoriesAdvanced.kt
72 lines (55 loc) · 2.24 KB
/
JetBrainsRepositoriesAdvanced.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.*
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.annotations.Import
import org.jetbrains.kotlinx.dataframe.io.*
fun main() {
// How can you create a function when types are implicit?
// 1: castTo + https://kotlinlang.org/docs/functions.html#single-expression-functions
val df = DataFrame.readCSV("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
printInfo(df)
// 2: generate code
df.generateDataClasses("Repositories").print()
val repos = df.convertTo<Repositories>()
printInfoTyped(repos)
repos.append()
}
// Option 1
private val sample = @Import DataFrame.readCSV("jetbrains_repositories.csv")
fun printInfo(raw: AnyFrame) {
val df = raw.castTo(schemaFrom = sample)
df.stargazers_count.print()
df.filter { stargazers_count > 50 }.print()
println(df.count { stargazers_count > 50 })
println(df.count { stargazers_count == 0 })
// let's try to parse topics
val df1 = parseTopics(df)
df1.sortByDesc { stargazers_count }.print(rowsLimit = 10)
df1.explode { topicsList }.groupBy { topicsList }.sortByGroupDesc { it.rowsCount() }.print()
}
private fun parseTopics(raw: AnyFrame) = raw.castTo(sample)
.add("topicsList") { topics.removeSurrounding("[", "]").split(", ").filter { it.isNotEmpty() } }
.add("topicsSize") { topicsList.size }
// Option 2
@DataSchema
data class Repositories(
@ColumnName("full_name")
val fullName: String,
@ColumnName("html_url")
val htmlUrl: java.net.URL,
@ColumnName("stargazers_count")
val stargazersCount: Int,
val topics: String,
val watchers: Int
)
fun printInfoTyped(df: DataFrame<Repositories>) {
df.stargazersCount.print()
df.filter { stargazersCount > 50 }.print()
println(df.count { stargazersCount > 50 })
println(df.count { stargazersCount == 0 })
// let's try to parse topics
val df1 = parseTopics(df)
df1.sortByDesc { stargazers_count }.print(rowsLimit = 10)
df1.explode { topicsList }.groupBy { topicsList }.sortByGroupDesc { it.rowsCount() }.print()
}