Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analytics #118

Merged
merged 22 commits into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"type": "python",
"request": "launch",
"stopOnEntry": false,
"debugStdLib": true,
"pythonPath": "${config:python.pythonPath}",
"program": "${workspaceRoot}/manage.py",
"args": [
Expand All @@ -23,7 +24,6 @@
"DjangoDebugging"
],
"cwd": "${workspaceFolder}",
"env": {},
"envFile": "${workspaceFolder}/.env",
},
{
Expand All @@ -44,6 +44,7 @@
"type": "python",
"request": "launch",
"stopOnEntry": false,
"debugStdLib": true,
"pythonPath": "${config:python.pythonPath}",
"program": "${workspaceRoot}/manage.py",
"args": [
Expand All @@ -65,6 +66,7 @@
"name": "CVAT RQ - low",
"type": "python",
"request": "launch",
"debugStdLib": true,
"stopOnEntry": false,
"pythonPath": "${config:python.pythonPath}",
"program": "${workspaceRoot}/manage.py",
Expand Down
14 changes: 4 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,13 @@ services:
```
### Annotation logs

It is possible to proxy annotation logs from client to another server over http. For examlpe you can use Logstash.
To do that set DJANGO_LOG_SERVER_URL environment variable in cvat section of docker-compose.yml
file (or add this variable to docker-compose.override.yml).
It is possible to proxy annotation logs from client to ELK. To do that run the following command below:

```yml
version: "2.3"

services:
cvat:
environment:
DJANGO_LOG_SERVER_URL: https://annotation.example.com:5000
```bash
docker-compose -f docker-compose.yml -f analytics/docker-compose.yml up -d --build
```


### Share path

You can use a share storage for data uploading during you are creating a task. To do that you can mount it to CVAT docker container. Example of docker-compose.override.yml for this purpose:
Expand Down
Empty file added analytics/README.md
Empty file.
64 changes: 64 additions & 0 deletions analytics/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
version: '2.3'
services:
cvat_elasticsearch:
container_name: cvat_elasticsearch
image: cvat_elasticsearch
networks:
default:
aliases:
- elasticsearch
build:
context: ./analytics/elasticsearch
args:
ELK_VERSION: 6.4.0
restart: always

cvat_kibana:
container_name: cvat_kibana
image: cvat_kibana
networks:
default:
aliases:
- kibana
build:
context: ./analytics/kibana
args:
ELK_VERSION: 6.4.0
ports:
- "5601:5601"
depends_on: ['cvat_elasticsearch']
restart: always

cvat_kibana_setup:
container_name: cvat_kibana_setup
image: cvat
volumes: ['./analytics/kibana:/home/django/kibana:ro']
depends_on: ['cvat']
working_dir: '/home/django'
entrypoint: ['bash', 'wait-for-it.sh', 'elasticsearch:9200', '-t', '0', '--',
'/bin/bash', 'wait-for-it.sh', 'kibana:5601', '-t', '0', '--',
'/usr/bin/python3', 'kibana/setup.py', 'kibana/export.json']
environment:
no_proxy: elasticsearch,kibana,${no_proxy}

cvat_logstash:
container_name: cvat_logstash
image: cvat_logstash
networks:
default:
aliases:
- logstash
build:
context: ./analytics/logstash
args:
ELK_VERSION: 6.4.0
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
depends_on: ['cvat_elasticsearch']
restart: always

cvat:
environment:
DJANGO_LOG_SERVER_HOST: "logstash"
DJANGO_LOG_SERVER_PORT: 5000
no_proxy: logstash,${no_proxy}
3 changes: 3 additions & 0 deletions analytics/elasticsearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ARG ELK_VERSION
FROM docker.elastic.co/elasticsearch/elasticsearch-oss:${ELK_VERSION}
COPY --chown=elasticsearch:elasticsearch elasticsearch.yml /usr/share/elasticsearch/config/
2 changes: 2 additions & 0 deletions analytics/elasticsearch/elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
http.host: 0.0.0.0
script.painless.regex.enabled: true
5 changes: 5 additions & 0 deletions analytics/kibana/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ARG ELK_VERSION
FROM docker.elastic.co/kibana/kibana-oss:${ELK_VERSION}
COPY kibana.yml /usr/share/kibana/config/


198 changes: 198 additions & 0 deletions analytics/kibana/export.json

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions analytics/kibana/kibana.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
server.host: 0.0.0.0
elasticsearch.url: http://elasticsearch:9200
elasticsearch.requestHeadersWhitelist: [ cookie, authorization, x-forwarded-user ]
kibana.defaultAppId: "discover"
40 changes: 40 additions & 0 deletions analytics/kibana/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#/usr/bin/env python

import os
import argparse
import requests
import json

def import_resources(host, port, cfg_file):
with open(cfg_file, 'r') as f:
for saved_object in json.load(f):
_id = saved_object["_id"]
_type = saved_object["_type"]
_doc = saved_object["_source"]
import_saved_object(host, port, _type, _id, _doc)

def import_saved_object(host, port, _type, _id, data):
saved_objects_api = "http://{}:{}/api/saved_objects/{}/{}".format(
host, port, _type, _id)
request = requests.get(saved_objects_api)
if request.status_code == 404:
print("Creating {} as {}".format(_type, _id))
request = requests.post(saved_objects_api, json={"attributes": data},
headers={'kbn-xsrf': 'true'})
else:
print("Updating {} named {}".format(_type, _id))
request = requests.put(saved_objects_api, json={"attributes": data},
headers={'kbn-xsrf': 'true'})
request.raise_for_status()

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='import Kibana 6.x resources',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('export_file', metavar='FILE',
help='JSON export file with resources')
parser.add_argument('-p', '--port', metavar='PORT', default=5601, type=int,
help='port of Kibana instance')
parser.add_argument('-H', '--host', metavar='HOST', default='kibana',
help='host of Kibana instance')
args = parser.parse_args()
import_resources(args.host, args.port, args.export_file)
7 changes: 7 additions & 0 deletions analytics/logstash/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ARG ELK_VERSION
FROM docker.elastic.co/logstash/logstash-oss:${ELK_VERSION}
RUN logstash-plugin install logstash-input-http logstash-filter-aggregate \
logstash-filter-prune logstash-output-email

COPY logstash.conf /usr/share/logstash/pipeline/
EXPOSE 5000
85 changes: 85 additions & 0 deletions analytics/logstash/logstash.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
input {
tcp {
port => 5000
codec => json
}
}

filter {
if [logger_name] =~ /cvat.client/ {
# 1. Decode the event from json in 'message' field
# 2. Remove unnecessary field from it
# 3. Type it as client
json {
source => "message"
}

date {
match => ["timestamp", "UNIX", "UNIX_MS"]
remove_field => "timestamp"
}

if [event] == "Send exception" {
aggregate {
task_id => "%{userid}_%{application}_%{message}_%{filename}_%{line}"
code => "
require 'time'

map['userid'] ||= event.get('userid');
map['application'] ||= event.get('application');
map['message'] ||= event.get('message');
map['filename'] ||= event.get('filename');
map['line'] ||= event.get('line');
map['task'] ||= event.get('task');

map['error_count'] ||= 0;
map['error_count'] += 1;

map['aggregated_message'] ||= '';
time = Time.strptime(event.get('timestamp').to_s,'%Q').localtime('+03:00')
map['aggregated_message'] += time.to_s + '\n' + event.get('stack') + '\n\n\n';"

timeout => 3600
timeout_tags => ['send_email_notification']
push_map_as_event_on_timeout => true
}
}

prune {
blacklist_names => ["level", "host", "logger_name", "message", "path",
"port", "stack_info"]
}

mutate {
replace => { "type" => "client" }
}
} else if [logger_name] =~ /cvat.server/ {
# 1. Remove unnecessary field from it
# 2. Type it as server
prune {
blacklist_names => ["host", "port"]
}

mutate {
replace => { "type" => "server" }
}
}
}

output {
stdout {
codec => rubydebug
}

if [type] == "client" {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "cvat.client"
}
} else if [type] == "server" {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "cvat.server"
}
}
}
93 changes: 53 additions & 40 deletions cvat/apps/engine/logging.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,42 @@

# Copyright (C) 2018 Intel Corporation
#
# SPDX-License-Identifier: MIT

import os
import inspect
import logging
from . import models
from cvat.settings.base import LOGGING
from cvat.apps.engine.models import Job, Task

def _get_task(tid):
try:
return Task.objects.get(pk=tid)
except Exception:
raise Exception('{} key must be a task identifier'.format(tid))

def _get_job(jid):
try:
return models.Job.objects.select_related("segment__task").get(id=jid)
except Exception:
raise Exception('{} key must be a job identifier'.format(jid))

class TaskLoggerStorage:
def __init__(self):
self._storage = dict()
self._formatter = logging.getLogger('task')

def __getitem__(self, tid):
if tid not in self._storage:
self._storage[tid] = self._create_task_logger(tid)
return self._storage[tid]

def _create_task_logger(self, tid):
task = self._get_task(tid)
if task is not None:
configuration = LOGGING.copy()
handler_configuration = configuration['handlers']['file']
handler_configuration['filename'] = task.get_log_path()
configuration['handlers'] = {
'file_{}'.format(tid): handler_configuration
}
configuration['loggers'] = {
'task_{}'.format(tid): {
'handlers': ['file_{}'.format(tid)],
'level': os.getenv('DJANGO_LOG_LEVEL', 'DEBUG'),
}
}

logging.config.dictConfig(configuration)
logger = logging.getLogger('task_{}'.format(tid))
return logger
else:
raise Exception('Key must be task indentificator')

def _get_task(self, tid):
try:
return models.Task.objects.get(pk=tid)
except Exception:
return None
task = _get_task(tid)

logger = logging.getLogger('cvat.server.task_{}'.format(tid))
server_file = logging.FileHandler(filename=task.get_log_path())
logger.addHandler(server_file)

return logger

class JobLoggerStorage:
def __init__(self):
Expand All @@ -59,17 +48,41 @@ def __getitem__(self, jid):
return self._storage[jid]

def _get_task_logger(self, jid):
job = self._get_job(jid)
if job is not None:
return task_logger[job.segment.task.id]
else:
raise Exception('Key must be job identificator')

def _get_job(self, jid):
try:
return models.Job.objects.select_related("segment__task").get(id=jid)
except Exception:
return None
job = _get_job(jid)
return task_logger[job.segment.task.id]

class TaskClientLoggerStorage:
def __init__(self):
self._storage = dict()

def __getitem__(self, tid):
if tid not in self._storage:
self._storage[tid] = self._create_client_logger(tid)
return self._storage[tid]

def _create_client_logger(self, tid):
task = _get_task(tid)
logger = logging.getLogger('cvat.client.task_{}'.format(tid))
client_file = logging.FileHandler(filename=task.get_client_log_path())
logger.addHandler(client_file)

return logger

class JobClientLoggerStorage:
def __init__(self):
self._storage = dict()

def __getitem__(self, jid):
if jid not in self._storage:
self._storage[jid] = self._get_task_logger(jid)
return self._storage[jid]

def _get_task_logger(self, jid):
job = _get_job(jid)
return task_client_logger[job.segment.task.id]

task_logger = TaskLoggerStorage()
job_logger = JobLoggerStorage()
global_logger = logging.getLogger('cvat.server')
job_client_logger = JobClientLoggerStorage()
task_client_logger = TaskClientLoggerStorage()
Loading