Skip to content

HAProxy as a load balancer layer to N number of docker containers runing splash instances.

Notifications You must be signed in to change notification settings

sbhusal123/HAProxy-Docker-Load-Balancing

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

1 Commit
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Splash container Load Balancing With Haproxy and Basic Authentication

  • HAProxy as a load balancer layer to N number of docker containers runing splash instances.
  • Requests delivered to every containers in round robin fashion.
  • Memory usage limit on every containers, on exceeding limit killed and recreated.

Setup:

  • Spin up containers: docker-compose up

  • Remove containers: docker-compose down

  • Splash: http://localhost:8050, username: splashUser, password: splashPassword

  • Status of Containers: http://localhost:8036, username: splashadmin, password: splashadminPassword

ScreenShoots

Stats Page

1. Docker Compose Configuration

version: '2'

services:
    # haproxy  service as a load balancer to splash instance
    haproxy:
        image: haproxy:1.7
        ports:
            # stats dashoard for runing clusters of splash instances
            - "8036:8036"

            # splash 
            - "8050:8050"
        links:
            - splash0
            - splash1
            # - ....
            # - ....
            - splashN
        volumes:
            # maps local configuration to instance config
            - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro

    splash0:
        image: scrapinghub/splash:3.0
        command: --max-timeout 3600 --slots 5 --maxrss 3000 --verbosity 1
        expose:
            - 8050
        mem_limit: 4200m
        memswap_limit: 5400m
        restart: always
    
    # ...
    # If more instances are needed duplicate splash and rename the container
    # ...


    # Nth instance
    splashN:
        image: scrapinghub/splash:3.0
        command: --max-timeout 3600 --slots 5 --maxrss 3000 --verbosity 1
        expose:
            - 8050
        mem_limit: 4200m
        memswap_limit: 5400m
        restart: always

i. Haproxy Container configuration

  • Used as a load balancing layer for the number of splash containers.
    haproxy:
        image: haproxy:1.7 # version of haproxy servere
        ports:
            # stats for runing clusters of splash instances
            - "8036:8036"

            # splash 
            - "8050:8050"
        links:
            - splash0
            - splash1
            # .....
            # .....
            - splashN
        volumes:
            # maps local configuration of haproxy.cfg to instance config
            - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro

ii. haproxy.cfg

# HAProxy 1.7 config for Splash. It assumes Splash instances are executed
# on the same machine and connected to HAProxy using Docker links.
global
    # raise it if necessary
    maxconn 512
    # required for stats page
    stats socket /tmp/haproxy

userlist users
    user <SplashUsername> insecure-password <SplashPassword>

defaults
    log global
    mode http

    # remove requests from a queue when clients disconnect;
    # see https://cbonte.github.io/haproxy-dconv/1.7/configuration.html#4.2-option%20abortonclose
    option abortonclose

    # gzip can save quite a lot of traffic with json, html or base64 data
    compression algo gzip
    compression type text/html text/plain application/json

    # increase these values if you want to
    # allow longer request queues in HAProxy
    timeout connect 3600s
    timeout client 3600s
    timeout server 3600s


# visit 0.0.0.0:8036 to see HAProxy stats admin page
listen stats
    bind *:8036
    mode http
    stats enable
    stats hide-version
    stats show-legends
    stats show-desc Splash Cluster
    stats uri /
    stats refresh 10s
    stats realm Haproxy\ Statistics
    stats auth    <StatsAdminUser>:<StatsAdminPassword>


# Splash Cluster configuration
frontend http-in
    bind *:8050

    # http basic auth
    acl auth_ok http_auth(users)
    http-request auth realm Splash if !auth_ok
    http-request allow if auth_ok
    http-request deny

    # don't apply the same limits for non-render endpoints
    acl staticfiles path_beg /_harviewer/
    acl misc path / /info /_debug /debug

    use_backend splash-cluster if auth_ok !staticfiles !misc
    use_backend splash-misc if auth_ok staticfiles
    use_backend splash-misc if auth_ok misc

backend splash-cluster
    option httpchk GET /
    balance leastconn

    # try another instance when connection is dropped, N is the number of instance
    retries 2
    option redispatch

    # maxconn 5 -> corresponds to slots argument set. As we don't want a single container
    #  to accept more than 5 connection at a time.
    server splash-0 splash0:8050 check maxconn 5 inter 2s fall 10 observe layer4
    server splash-1 splashN:8050 check maxconn 5 inter 2s fall 10 observe layer4
    .....
    .....
    server splash-N splashN:8050 check maxconn 5 inter 2s fall 10 observe layer4

# Round Robin load balancing with N instances of splash
backend splash-misc
    # round robin load balancing. i.e. each request is uniformly passed to every container
    balance roundrobin
    server splash-0 splash0:8050 check fall 15
    server splash-1 splash1:8050 check fall 15
    ....
    ....
    server splash-N splashN:8050 check fall 15

Splash Container Configuration:

    splash0: # container Name
        image: scrapinghub/splash:3.0 # version of Splash

        # max-timeout -> A timeout (in seconds) for individual network requests. Unit: MB

        # --maxrss -> monitor, checks peak memory usage every minute (i.e. much less frequently) and
        # shuts down Splash  if peak memory usage was larger than a specific value.

        # --slots -> how many render jobs to run in parallel in a single Splash process.

        command: --max-timeout 3600 --slots 5 --maxrss 3000 --verbosity 1
        expose:
            - 8050

        # Resource Management within Container:  https://docs.docker.com/compose/compose-file/compose-file-v2/#cpu-and-other-resources

        # memory limit for container
        mem_limit: 4200m

        # swap memory limit
        memswap_limit: 5400m

        #  Without restart:always memory limits will just kill containers once maxrss exceeds.
        # Ensures that the new containers gets created everytime once the maxrss exceeds.
        restart: always

Additional Resource To Explore:

About

HAProxy as a load balancer layer to N number of docker containers runing splash instances.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages