forked from chunjie-sam-liu/useful-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot-access-by-address.R
executable file
·95 lines (79 loc) · 2.35 KB
/
plot-access-by-address.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# libs --------------------------------------------------------------------
library(magrittr)
library(ggplot2)
# load log ----------------------------------------------------------------
args <- commandArgs(TRUE)
filename <- "/home/liucj/tmp/stat-web-access/gscalite-2018-07-09.log"
filename <- args[1]
filename %>%
readr::read_delim(delim = " ", col_names = FALSE ) %>%
dplyr::rename(ip = X1, date = X2) %>%
dplyr::mutate(date = lubridate::dmy(date)) %>%
dplyr::distinct() %>%
dplyr::arrange(date) ->
log
# loc pie -----------------------------------------------------------------
ip2loc <- function(ip) {
url = "http://ip.taobao.com/service/getIpInfo.php?ip="
api <- glue::glue({"{url}{ip}"})
tryCatch(
expr = rjson::fromJSON(file = api)$data$country,
error = function(e) NULL,
warning = function(w) NULL
) -> .c
.c
}
# multidplyr
log %>%
dplyr::mutate(loc = purrr::map(.x = ip, .f = ip2loc)) %>%
dplyr::filter(purrr::map_lgl(.x = loc, .f = Negate(is.null))) %>%
tidyr::unnest() ->
log_ip
log_ip %>%
dplyr::group_by(loc) %>%
dplyr::filter(loc != "XX") %>%
dplyr::summarise(m = sum(n())) %>%
dplyr::arrange(dplyr::desc(m)) %>%
dplyr::pull(loc) %>%
head(5) -> top6
log_ip %>%
dplyr::mutate(loc = ifelse(loc %in% top6, loc, "其他")) %>%
dplyr::group_by(loc) %>%
dplyr::summarise(m = sum(n())) %>%
dplyr::arrange(dplyr::desc(m)) ->
log_ip_p
log_ip_p$loc -> lev
log_ip_p %>%
dplyr::mutate(loc = factor(loc, levels = lev)) %>%
dplyr::mutate(pos = (cumsum(c(0, m)) + c(m / 2, .01))[1:nrow(.)]) %>%
ggplot(aes(x = 1, y = m, fill = loc)) +
geom_col(position = position_stack(reverse = TRUE), show.legend = FALSE) +
coord_polar('y', start = 0) +
ggrepel::geom_text_repel(
aes(
x = 1.45,
y = pos,
label = loc
),
nudge_x = 0.3,
segment.size = .3
) +
scale_fill_brewer(palette = "Set2", name = "Country") +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank()
) +
labs(
title = glue::glue("Unique region: {length(unique(log_ip$loc))}")
) -> plot_loc
ggsave(
filename = paste(sub(pattern = ".log", "", filename),'loc-pie.pdf', sep = "-"),
plot = plot_loc,
device = "pdf",
width = 5,
height = 5
)