Skip to content

Commit

Permalink
Add port caching to improve startup performance.
Browse files Browse the repository at this point in the history
Fix test.
  • Loading branch information
rtibbles committed Apr 29, 2021
1 parent f7d2eb8 commit f07ddf3
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 37 deletions.
143 changes: 116 additions & 27 deletions kolibri/utils/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
# File used to activate profiling middleware and get profiler PID
PROFILE_LOCK = os.path.join(conf.KOLIBRI_HOME, "server_profile.lock")

# File used to store previously available ports
PORT_CACHE = os.path.join(conf.KOLIBRI_HOME, "port_cache")

# This is a special file with daemon activity. It logs ALL stderr output, some
# might not have made it to the log file!
DAEMON_LOG = os.path.join(conf.LOG_ROOT, "daemon.txt")
Expand Down Expand Up @@ -109,7 +112,78 @@ def error_log(self, msg="", level=20, traceback=False):
return logger.log(level, msg)


def check_port_availability(host, port):
"""
Make sure the port is available for the server to start.
"""
# Also bypass when the port is 0, as that will choose a port
if port:
try:
wait_for_free_port(host, port, timeout=PORT_AVAILABILITY_CHECK_TIMEOUT)
except OSError:
return False
return True


class PortCache:
def __init__(self):
self.values = {}
self.load()

def register_port(self, port):
self.values[port] = True
self.save()

def get_port(self, host):
if self.values:
try:
port = next(p for p in self.values if not self.values[p])
if port:
if check_port_availability(host, port):
self.values[port] = True
return port
except StopIteration:
pass
return None

def save(self):
with open(PORT_CACHE, "w") as f:
f.write("\n".join(str(p) for p in self.values.keys()))

def load(self):
try:
with open(PORT_CACHE, "r") as f:
for port in f.readlines():
self.values[int(port)] = False
except IOError:
pass


port_cache = PortCache()


class ServerPlugin(BaseServerPlugin):
def subscribe(self):
super(ServerPlugin, self).subscribe()
self.bus.subscribe("ENTER", self.ENTER)

def unsubscribe(self):
super(ServerPlugin, self).unsubscribe()
self.bus.unsubscribe("ENTER", self.ENTER)

def ENTER(self):
host, bind_port = self.bind_addr
if bind_port == 0:
port = port_cache.get_port(host)
if port:
self.bind_addr = (host, port)
self.httpserver.bind_addr = (host, port)

def START(self):
super(ServerPlugin, self).START()
_, port = self.httpserver.bind_addr
port_cache.register_port(port)

@property
def interface(self):
if self.httpserver.bind_addr is None:
Expand All @@ -123,6 +197,7 @@ def interface(self):

class KolibriServerPlugin(ServerPlugin):
def ENTER(self):
super(KolibriServerPlugin, self).ENTER()
# Clear old sessions up
call_command("clearsessions")

Expand Down Expand Up @@ -379,24 +454,47 @@ def configure_http_server(port, zip_port, bus):
alt_port_server.subscribe()


def check_port_availability(host, port):
"""
Make sure the port is available for the server to start.
"""
def background_port_check(port, zip_port):
# Do this before daemonization, otherwise just let the server processes handle this
# In case that something other than Kolibri occupies the port,
# check the port's availability.
# Bypass check when socket activation is used
# https://manpages.debian.org/testing/libsystemd-dev/sd_listen_fds.3.en.html#ENVIRONMENT
# Also bypass when the port is 0, as that will choose a port
if not os.environ.get("LISTEN_PID", None) and port:
try:
wait_for_free_port(host, port, timeout=PORT_AVAILABILITY_CHECK_TIMEOUT)
except OSError:
# Port is occupied
logger.error(
"Port {} is occupied.\n"
"Please check that you do not have other processes "
"running on this port and try again.\n".format(port)
)
sys.exit(1)
port = int(port)
zip_port = int(zip_port)
if (
not os.environ.get("LISTEN_PID", None)
and port
and not check_port_availability(LISTEN_ADDRESS, port)
):
# Port is occupied
logger.error(
"Port {} is occupied.\n"
"Please check that you do not have other processes "
"running on this port and try again.\n".format(port)
)
sys.exit(1)
if (
not os.environ.get("LISTEN_PID", None)
and zip_port
and not check_port_availability(LISTEN_ADDRESS, zip_port)
):
# Port is occupied
logger.error(
"Port {} is occupied.\n"
"Please check that you do not have other processes "
"running on this port and try again.\n".format(zip_port)
)
sys.exit(1)
if port:
__, urls = get_urls(listen_port=port)
for url in urls:
logger.info("Kolibri running on: {}".format(url))
else:
logger.info(
"No port specified, for information about accessing the server, run kolibri status"
)


def start(port=0, zip_port=0, serve_http=True, background=False):
Expand All @@ -405,6 +503,8 @@ def start(port=0, zip_port=0, serve_http=True, background=False):
:param: port: Port number (default: 0) - assigned by free port
"""
port = int(port)
zip_port = int(zip_port)
# On Mac, Python crashes when forking the process, so prevent daemonization until we can figure out
# a better fix. See https://github.com/learningequality/kolibri/issues/4821
if sys.platform == "darwin":
Expand All @@ -430,18 +530,7 @@ def start(port=0, zip_port=0, serve_http=True, background=False):
pid_plugin.subscribe()

if background and serve_http:
# Do this before daemonization, otherwise just let the server processes handle this
# In case that something other than Kolibri occupies the port,
# check the port's availability.
check_port_availability(LISTEN_ADDRESS, port)
if port:
__, urls = get_urls(listen_port=port)
for url in urls:
logger.info("Kolibri running on: {}".format(url))
else:
logger.info(
"No port specified, for information about accessing the server, run kolibri status"
)
background_port_check(port, zip_port)

logger.info("Starting Kolibri {version}".format(version=kolibri.__version__))

Expand Down
4 changes: 3 additions & 1 deletion kolibri/utils/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,16 @@ def start_mock(port, *args, **kwargs):

test_port = 1234

test_zip_port = 5432

os.environ["KOLIBRI_HTTP_PORT"] = str(test_port)

# force a reload of plugins.OPTIONS so the environment variable will be read in
from kolibri.utils import conf

conf.OPTIONS.update(options.read_options_file(conf.KOLIBRI_HOME))

cli.start.callback(test_port, False)
cli.start.callback(test_port, test_zip_port, False)
with pytest.raises(SystemExit) as excinfo:
cli.stop.callback()
assert excinfo.code == 0
Expand Down
30 changes: 21 additions & 9 deletions kolibri/utils/tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
from __future__ import print_function
from __future__ import unicode_literals

import mock
import pytest
import os
from unittest import TestCase

import mock
import pytest

from kolibri.core.tasks.scheduler import Scheduler
from kolibri.core.tasks.test.base import connection
from kolibri.utils import server
Expand Down Expand Up @@ -84,14 +85,18 @@ def test_required_services_initiate_on_start(

# Start server services
services_plugin = server.ServicesPlugin(mock.MagicMock(name="bus"), 1234)
services_plugin.start()
services_plugin.START()

# Do we initialize workers when services start?
initialize_workers.assert_called_once()

# Do we start scheduler when services start?
server.scheduler.start_scheduler.assert_called_once()

register_zeroconf_service.assert_not_called()

services_plugin.SERVING(1234)

# Do we register ourselves on zeroconf?
register_zeroconf_service.assert_called_once_with(port=1234)

Expand Down Expand Up @@ -129,7 +134,7 @@ def test_scheduled_jobs_persist_on_restart(

# Now, start services plugin
service_plugin = server.ServicesPlugin(mock.MagicMock(name="bus"), 1234)
service_plugin.start()
service_plugin.START()

# Currently, we must have exactly four scheduled jobs
# two userdefined and two server defined (pingback and vacuum)
Expand All @@ -140,8 +145,8 @@ def test_scheduled_jobs_persist_on_restart(
assert scheduler.get_job(server.SCH_VACUUM_JOB_ID) is not None

# Restart services
service_plugin.stop()
service_plugin.start()
service_plugin.STOP()
service_plugin.START()

# Make sure all scheduled jobs persist after restart
assert scheduler.count() == 4
Expand All @@ -168,7 +173,7 @@ def test_services_shutdown_on_stop(self, unregister_zeroconf_service, scheduler)
]

# Now, let us stop services plugin
services_plugin.stop()
services_plugin.STOP()

# Do we shutdown scheduler?
server.scheduler.shutdown_scheduler.assert_called_once()
Expand All @@ -191,7 +196,7 @@ class ServerInitializationTestCase(TestCase):
def test_port_occupied(self, wait_for_port_mock, logging_mock):
wait_for_port_mock.side_effect = OSError
with self.assertRaises(SystemExit):
server.check_port_availability("0.0.0.0", "8080")
server.background_port_check("8080", "8081")
logging_mock.assert_called()

@mock.patch("kolibri.utils.server.logging.error")
Expand All @@ -200,5 +205,12 @@ def test_port_occupied_socket_activation(self, wait_for_port_mock, logging_mock)
wait_for_port_mock.side_effect = OSError
# LISTEN_PID environment variable would be set if using socket activation
with mock.patch.dict(os.environ, {"LISTEN_PID": "1234"}):
server.check_port_availability("0.0.0.0", "8080")
server.background_port_check("8080", "8081")
logging_mock.assert_not_called()

@mock.patch("kolibri.utils.server.logging.error")
@mock.patch("kolibri.utils.server.wait_for_free_port")
def test_port_zero_zip_port_zero(self, wait_for_port_mock, logging_mock):
wait_for_port_mock.side_effect = OSError
server.background_port_check("0", "0")
logging_mock.assert_not_called()

0 comments on commit f07ddf3

Please sign in to comment.