Skip to content

Commit

Permalink
Add analytics (#118)
Browse files Browse the repository at this point in the history
* ELK stack to analyze logs from client and server
* Short-live container to configure kibana. Import kibana dashboards from export.json on startup.
* Client and server logging is working throw python-logstash
* Minor polishing of events and dashboards.
* Default is discover, new visualizations.
* Make comments more readable inside logger.js.
* Added a path for backups.
  • Loading branch information
nmanovic authored Oct 8, 2018
1 parent e463868 commit c096c2a
Show file tree
Hide file tree
Showing 31 changed files with 595 additions and 288 deletions.
4 changes: 3 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"type": "python",
"request": "launch",
"stopOnEntry": false,
"debugStdLib": true,
"pythonPath": "${config:python.pythonPath}",
"program": "${workspaceRoot}/manage.py",
"args": [
Expand All @@ -23,7 +24,6 @@
"DjangoDebugging"
],
"cwd": "${workspaceFolder}",
"env": {},
"envFile": "${workspaceFolder}/.env",
},
{
Expand All @@ -44,6 +44,7 @@
"type": "python",
"request": "launch",
"stopOnEntry": false,
"debugStdLib": true,
"pythonPath": "${config:python.pythonPath}",
"program": "${workspaceRoot}/manage.py",
"args": [
Expand All @@ -65,6 +66,7 @@
"name": "CVAT RQ - low",
"type": "python",
"request": "launch",
"debugStdLib": true,
"stopOnEntry": false,
"pythonPath": "${config:python.pythonPath}",
"program": "${workspaceRoot}/manage.py",
Expand Down
14 changes: 4 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,13 @@ services:
```
### Annotation logs
It is possible to proxy annotation logs from client to another server over http. For examlpe you can use Logstash.
To do that set DJANGO_LOG_SERVER_URL environment variable in cvat section of docker-compose.yml
file (or add this variable to docker-compose.override.yml).
It is possible to proxy annotation logs from client to ELK. To do that run the following command below:
```yml
version: "2.3"

services:
cvat:
environment:
DJANGO_LOG_SERVER_URL: https://annotation.example.com:5000
```bash
docker-compose -f docker-compose.yml -f analytics/docker-compose.yml up -d --build
```


### Share path

You can use a share storage for data uploading during you are creating a task. To do that you can mount it to CVAT docker container. Example of docker-compose.override.yml for this purpose:
Expand Down
Empty file added analytics/README.md
Empty file.
64 changes: 64 additions & 0 deletions analytics/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
version: '2.3'
services:
cvat_elasticsearch:
container_name: cvat_elasticsearch
image: cvat_elasticsearch
networks:
default:
aliases:
- elasticsearch
build:
context: ./analytics/elasticsearch
args:
ELK_VERSION: 6.4.0
restart: always

cvat_kibana:
container_name: cvat_kibana
image: cvat_kibana
networks:
default:
aliases:
- kibana
build:
context: ./analytics/kibana
args:
ELK_VERSION: 6.4.0
ports:
- "5601:5601"
depends_on: ['cvat_elasticsearch']
restart: always

cvat_kibana_setup:
container_name: cvat_kibana_setup
image: cvat
volumes: ['./analytics/kibana:/home/django/kibana:ro']
depends_on: ['cvat']
working_dir: '/home/django'
entrypoint: ['bash', 'wait-for-it.sh', 'elasticsearch:9200', '-t', '0', '--',
'/bin/bash', 'wait-for-it.sh', 'kibana:5601', '-t', '0', '--',
'/usr/bin/python3', 'kibana/setup.py', 'kibana/export.json']
environment:
no_proxy: elasticsearch,kibana,${no_proxy}

cvat_logstash:
container_name: cvat_logstash
image: cvat_logstash
networks:
default:
aliases:
- logstash
build:
context: ./analytics/logstash
args:
ELK_VERSION: 6.4.0
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
depends_on: ['cvat_elasticsearch']
restart: always

cvat:
environment:
DJANGO_LOG_SERVER_HOST: "logstash"
DJANGO_LOG_SERVER_PORT: 5000
no_proxy: logstash,${no_proxy}
3 changes: 3 additions & 0 deletions analytics/elasticsearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ARG ELK_VERSION
FROM docker.elastic.co/elasticsearch/elasticsearch-oss:${ELK_VERSION}
COPY --chown=elasticsearch:elasticsearch elasticsearch.yml /usr/share/elasticsearch/config/
3 changes: 3 additions & 0 deletions analytics/elasticsearch/elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
http.host: 0.0.0.0
script.painless.regex.enabled: true
path.repo: ["/usr/share/elasticsearch/backup"]
5 changes: 5 additions & 0 deletions analytics/kibana/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ARG ELK_VERSION
FROM docker.elastic.co/kibana/kibana-oss:${ELK_VERSION}
COPY kibana.yml /usr/share/kibana/config/


198 changes: 198 additions & 0 deletions analytics/kibana/export.json

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions analytics/kibana/kibana.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
server.host: 0.0.0.0
elasticsearch.url: http://elasticsearch:9200
elasticsearch.requestHeadersWhitelist: [ cookie, authorization, x-forwarded-user ]
kibana.defaultAppId: "discover"
40 changes: 40 additions & 0 deletions analytics/kibana/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#/usr/bin/env python

import os
import argparse
import requests
import json

def import_resources(host, port, cfg_file):
with open(cfg_file, 'r') as f:
for saved_object in json.load(f):
_id = saved_object["_id"]
_type = saved_object["_type"]
_doc = saved_object["_source"]
import_saved_object(host, port, _type, _id, _doc)

def import_saved_object(host, port, _type, _id, data):
saved_objects_api = "http://{}:{}/api/saved_objects/{}/{}".format(
host, port, _type, _id)
request = requests.get(saved_objects_api)
if request.status_code == 404:
print("Creating {} as {}".format(_type, _id))
request = requests.post(saved_objects_api, json={"attributes": data},
headers={'kbn-xsrf': 'true'})
else:
print("Updating {} named {}".format(_type, _id))
request = requests.put(saved_objects_api, json={"attributes": data},
headers={'kbn-xsrf': 'true'})
request.raise_for_status()

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='import Kibana 6.x resources',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('export_file', metavar='FILE',
help='JSON export file with resources')
parser.add_argument('-p', '--port', metavar='PORT', default=5601, type=int,
help='port of Kibana instance')
parser.add_argument('-H', '--host', metavar='HOST', default='kibana',
help='host of Kibana instance')
args = parser.parse_args()
import_resources(args.host, args.port, args.export_file)
7 changes: 7 additions & 0 deletions analytics/logstash/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ARG ELK_VERSION
FROM docker.elastic.co/logstash/logstash-oss:${ELK_VERSION}
RUN logstash-plugin install logstash-input-http logstash-filter-aggregate \
logstash-filter-prune logstash-output-email

COPY logstash.conf /usr/share/logstash/pipeline/
EXPOSE 5000
85 changes: 85 additions & 0 deletions analytics/logstash/logstash.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
input {
tcp {
port => 5000
codec => json
}
}

filter {
if [logger_name] =~ /cvat.client/ {
# 1. Decode the event from json in 'message' field
# 2. Remove unnecessary field from it
# 3. Type it as client
json {
source => "message"
}

date {
match => ["timestamp", "UNIX", "UNIX_MS"]
remove_field => "timestamp"
}

if [event] == "Send exception" {
aggregate {
task_id => "%{userid}_%{application}_%{message}_%{filename}_%{line}"
code => "
require 'time'

map['userid'] ||= event.get('userid');
map['application'] ||= event.get('application');
map['message'] ||= event.get('message');
map['filename'] ||= event.get('filename');
map['line'] ||= event.get('line');
map['task'] ||= event.get('task');

map['error_count'] ||= 0;
map['error_count'] += 1;

map['aggregated_message'] ||= '';
time = Time.strptime(event.get('timestamp').to_s,'%Q').localtime('+03:00')
map['aggregated_message'] += time.to_s + '\n' + event.get('stack') + '\n\n\n';"

timeout => 3600
timeout_tags => ['send_email_notification']
push_map_as_event_on_timeout => true
}
}

prune {
blacklist_names => ["level", "host", "logger_name", "message", "path",
"port", "stack_info"]
}

mutate {
replace => { "type" => "client" }
}
} else if [logger_name] =~ /cvat.server/ {
# 1. Remove unnecessary field from it
# 2. Type it as server
prune {
blacklist_names => ["host", "port"]
}

mutate {
replace => { "type" => "server" }
}
}
}

output {
stdout {
codec => rubydebug
}

if [type] == "client" {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "cvat.client"
}
} else if [type] == "server" {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "cvat.server"
}
}
}
Loading

0 comments on commit c096c2a

Please sign in to comment.